class Collection(db.Model): cid = db.Column(db.Text, primary_key=True) created = db.Column(db.DateTime()) last_modified = db.Column(db.DateTime()) ip_address = db.Column(db.Text) title = db.Column(db.Text) refset_metadata = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) tiid_links = db.relationship('CollectionTiid', lazy='subquery', cascade="all, delete-orphan", backref=db.backref("collection", lazy="subquery")) # added_items = db.relationship('AddedItem', lazy='subquery', cascade="all, delete-orphan", # backref=db.backref("collection", lazy="subquery")) def __init__(self, collection_id=None, **kwargs): logger.debug(u"new Collection {kwargs}".format(kwargs=kwargs)) if collection_id is None: collection_id = _make_id() self.cid = collection_id now = datetime.datetime.utcnow() if "created" in kwargs: self.created = kwargs["created"] else: self.created = now if "last_modified" in kwargs: self.last_modified = kwargs["last_modified"] else: self.last_modified = now super(Collection, self).__init__(**kwargs) @property def tiids(self): return [tiid_link.tiid for tiid_link in self.tiid_links] # @property # def added_aliases(self): # return [added_item.alias_tuple for added_item in self.added_items] def __repr__(self): return '<Collection {cid}, {title}>'.format(cid=self.cid, title=self.title) @classmethod def create_from_old_doc(cls, doc): doc_copy = copy.deepcopy(doc) doc_copy["cid"] = doc_copy["_id"] for key in doc_copy.keys(): if key not in [ "cid", "created", "last_modified", "ip_address", "title", "refset_metadata" ]: del doc_copy[key] new_collection_object = Collection(**doc_copy) return new_collection_object
class ApiUser(db.Model): api_key = db.Column(db.Text, primary_key=True) created = db.Column(db.DateTime()) planned_use = db.Column(db.Text) example_url = db.Column(db.Text) api_key_owner = db.Column(db.Text) notes = db.Column(db.Text) api_key_owner = db.Column(db.Text) email = db.Column(db.Text) organization = db.Column(db.Text) max_registered_items = db.Column( db.Numeric ) # should be Integer, is Numeric to keep consistent with previous table def __init__(self, **kwargs): prefix = kwargs["prefix"] del kwargs["prefix"] self.api_key = self.make_api_key(prefix) self.created = datetime.datetime.utcnow() super(ApiUser, self).__init__(**kwargs) def __repr__(self): return '<ApiUser {api_key}, {email}, {api_key_owner}>'.format( api_key=self.api_key, api_key_owner=self.api_key_owner, email=self.email) def make_api_key(self, prefix): new_api_key = prefix + "-" + shortuuid.uuid()[0:6] new_api_key = new_api_key.lower() return new_api_key
class RegisteredItem(db.Model): api_key = db.Column(db.Text, db.ForeignKey('api_user.api_key'), primary_key=True) api_user = db.relationship('ApiUser', backref=db.backref('registered_items', lazy='dynamic')) namespace = db.Column(db.Text, primary_key=True) nid = db.Column(db.Text, primary_key=True) registered_date = db.Column(db.DateTime()) def __init__(self, alias, api_user): alias = item.canonical_alias_tuple(alias) (namespace, nid) = alias self.namespace = namespace self.nid = nid self.api_user = api_user self.registered_date = datetime.datetime.utcnow() super(RegisteredItem, self).__init__() @property def alias(self): return (self.namespace, self.nid) def __repr__(self): return '<RegisteredItem {api_key}, {alias}>'.format( api_key=self.api_user.api_key, alias=self.alias)
class IncomingEmail(db.Model): id = db.Column(db.Integer, primary_key=True) created = db.Column(db.DateTime()) payload = db.Column(db.Text) def __init__(self, payload): self.payload = json.dumps(payload) self.created = datetime.datetime.utcnow() super(IncomingEmail, self).__init__() @property def subject(self): payload = json.loads(self.payload) return payload["headers"]["Subject"] @property def email_body(self): payload = json.loads(self.payload) return payload["plain"] def __repr__(self): return '<IncomingEmail {id}, {created}, {payload_start}>'.format( id=self.id, created=self.created, payload_start=self.payload[0:100]) def log_if_google_scholar_notification_confirmation(self): GOOGLE_SCHOLAR_CONFIRM_PATTERN = re.compile( """for the query:\nNew articles in (?P<name>.*)'s profile\n\nClick to confirm this request:\n(?P<url>.*)\n\n""" ) name = None url = None try: match = GOOGLE_SCHOLAR_CONFIRM_PATTERN.search(self.email_body) if match: url = match.group("url") name = match.group("name") logger.info( u"Google Scholar notification confirmation for {name} is at {url}" .format(name=name, url=url)) except (KeyError, TypeError): pass return (name, url) def log_if_google_scholar_new_articles(self): GOOGLE_SCHOLAR_NEW_ARTICLES_PATTERN = re.compile( """Scholar Alert - (?P<name>.*) - new articles""") name = None try: match = GOOGLE_SCHOLAR_NEW_ARTICLES_PATTERN.search(self.subject) if match: name = match.group("name") logger.info( u"Just received Google Scholar alert: new articles for {name}, saved at {id}" .format(name=name, id=self.id)) except (KeyError, TypeError): pass return (name)
class Snap(db.Model): snap_id = db.Column(db.Text, primary_key=True) tiid = db.Column(db.Text, db.ForeignKey('item.tiid')) provider = db.Column(db.Text) interaction = db.Column(db.Text) last_collected_date = db.Column(db.DateTime()) raw_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) drilldown_url = db.Column(db.Text) first_collected_date = db.Column(db.DateTime()) number_times_collected = db.Column(db.Integer) query_type = None def __init__(self, **kwargs): if not "last_collected_date" in kwargs: self.last_collected_date = datetime.datetime.utcnow() if not "first_collected_date" in kwargs: self.first_collected_date = datetime.datetime.utcnow() if not "snap_id" in kwargs: self.snap_id = shortuuid.uuid() if "query_type" in kwargs: self.query_type = kwargs["query_type"] super(Snap, self).__init__(**kwargs) #remove after migration complete @property def metric_name(self): return self.interaction @property def collected_date(self): return self.last_collected_date @property def fully_qualified_name(self): return "{provider}:{interaction}".format( provider=self.provider, interaction=self.interaction) def __repr__(self): return '<Snap {tiid} {provider}:{interaction}={raw_value} on {last_collected_date} via {query_type}>'.format( provider=self.provider, interaction=self.interaction, raw_value=self.raw_value, last_collected_date=self.last_collected_date, query_type=self.query_type, tiid=self.tiid)
class ProviderBatchData(db.Model): provider = db.Column(db.Text, primary_key=True) min_event_date = db.Column(db.DateTime(), primary_key=True) max_event_date = db.Column(db.DateTime()) raw = db.Column(db.Text) aliases = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) provider_raw_version = db.Column(db.Numeric) created = db.Column(db.DateTime()) def __init__(self, **kwargs): self.created = datetime.datetime.utcnow() super(ProviderBatchData, self).__init__(**kwargs) def __repr__(self): return '<ProviderBatchData {provider}, {min_event_date}, {len_aliases} aliases>'.format( provider=self.provider, min_event_date=self.min_event_date, len_aliases=sum( [len(self.aliases[namespace]) for namespace in self.aliases]))
class Alias(db.Model): tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True) namespace = db.Column(db.Text, primary_key=True) nid = db.Column(db.Text, primary_key=True) collected_date = db.Column(db.DateTime()) def __init__(self, **kwargs): # logger.debug(u"new Alias {kwargs}".format( # kwargs=kwargs)) if "alias_tuple" in kwargs: alias_tuple = canonical_alias_tuple(kwargs["alias_tuple"]) (namespace, nid) = alias_tuple self.namespace = namespace self.nid = nid if "collected_date" in kwargs: self.collected_date = kwargs["collected_date"] else: self.collected_date = datetime.datetime.utcnow() super(Alias, self).__init__(**kwargs) @hybrid_property def alias_tuple(self): return ((self.namespace, self.nid)) @alias_tuple.setter def alias_tuple(self, alias_tuple): try: (namespace, nid) = alias_tuple except ValueError: logger.debug("could not separate alias tuple {alias_tuple}".format( alias_tuple=alias_tuple)) raise self.namespace = namespace self.nid = nid def __repr__(self): return '<Alias {item}, {alias_tuple}>'.format( item=self.item, alias_tuple=self.alias_tuple) @classmethod def filter_by_alias(cls, alias_tuple): alias_tuple = canonical_alias_tuple(alias_tuple) (namespace, nid) = alias_tuple response = cls.query.filter_by(namespace=namespace, nid=nid) return response
class Metric(db.Model): tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True) provider = db.Column(db.Text, primary_key=True) metric_name = db.Column(db.Text, primary_key=True) collected_date = db.Column(db.DateTime(), primary_key=True) raw_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) drilldown_url = db.Column(db.Text) def __init__(self, **kwargs): if "collected_date" in kwargs: self.collected_date = kwargs["collected_date"] else: self.collected_date = datetime.datetime.utcnow() super(Metric, self).__init__(**kwargs) def __repr__(self): return '<Metric {tiid} {provider}:{metric_name}>'.format( provider=self.provider, metric_name=self.metric_name, tiid=self.tiid)
class Biblio(db.Model): tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True) provider = db.Column(db.Text, primary_key=True) biblio_name = db.Column(db.Text, primary_key=True) biblio_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) collected_date = db.Column(db.DateTime()) def __init__(self, **kwargs): # logger.debug(u"new Biblio {kwargs}".format( # kwargs=kwargs)) if "collected_date" in kwargs: self.collected_date = kwargs["collected_date"] else: self.collected_date = datetime.datetime.utcnow() if not "provider" in kwargs: self.provider = "unknown" super(Biblio, self).__init__(**kwargs) def __repr__(self): return '<Biblio {biblio_name}, {item}>'.format( biblio_name=self.biblio_name, item=self.item) @classmethod def filter_by_tiid(cls, tiid): response = cls.query.filter_by(tiid=tiid).all() return response @classmethod def as_dict_by_tiid(cls, tiid): response = {} biblio_elements = cls.query.filter_by(tiid=tiid).all() for biblio in biblio_elements: response[biblio.biblio_name] = biblio.biblio_value return response
class Item(db.Model): tiid = db.Column(db.Text, primary_key=True) created = db.Column(db.DateTime()) last_modified = db.Column(db.DateTime()) last_update_run = db.Column(db.DateTime()) aliases = db.relationship('Alias', lazy='subquery', cascade="all, delete-orphan", backref=db.backref("item", lazy="subquery")) biblios = db.relationship('Biblio', lazy='subquery', cascade="all, delete-orphan", backref=db.backref("item", lazy="subquery")) metrics = db.relationship('Metric', lazy='noload', cascade="all, delete-orphan", backref=db.backref("item", lazy="noload")) metrics_query = db.relationship('Metric', lazy='dynamic') def __init__(self, **kwargs): # logger.debug(u"new Item {kwargs}".format( # kwargs=kwargs)) if "tiid" in kwargs: self.tiid = kwargs["tiid"] else: self.tiid = shortuuid.uuid()[0:24] now = datetime.datetime.utcnow() if "created" in kwargs: self.created = kwargs["created"] else: self.created = now if "last_modified" in kwargs: self.last_modified = kwargs["last_modified"] else: self.last_modified = now if "last_update_run" in kwargs: self.last_update_run = kwargs["last_update_run"] else: self.last_update_run = now super(Item, self).__init__(**kwargs) def __repr__(self): return '<Item {tiid}>'.format( tiid=self.tiid) @classmethod def from_tiid(cls, tiid): item = cls.query.get(tiid) if not item: return None item.metrics = item.metrics_query.all() return item @property def alias_tuples(self): return [alias.alias_tuple for alias in self.aliases] @property def publication_date(self): publication_date = None for biblio in self.biblios: if biblio.biblio_name == "date": publication_date = biblio.biblio_value continue if (biblio.biblio_name == "year") and biblio.biblio_value: publication_date = datetime.datetime(int(biblio.biblio_value), 12, 31) if not publication_date: publication_date = self.created return publication_date.isoformat() @hybrid_method def published_before(self, mydate): return (self.publication_date < mydate.isoformat()) @classmethod def create_from_old_doc(cls, doc): logger.debug(u"in create_from_old_doc for {tiid}".format( tiid=doc["_id"])) doc_copy = copy.deepcopy(doc) doc_copy["tiid"] = doc_copy["_id"] for key in doc_copy.keys(): if key not in ["tiid", "created", "last_modified", "last_update_run"]: del doc_copy[key] new_item_object = Item(**doc_copy) return new_item_object def as_old_doc(self): # logger.debug(u"in as_old_doc for {tiid}".format( # tiid=self.tiid)) item_doc = {} item_doc["_id"] = self.tiid item_doc["last_modified"] = self.last_modified.isoformat() item_doc["created"] = self.created.isoformat() item_doc["type"] = "item" item_doc["biblio"] = {} for biblio in self.biblios: item_doc["biblio"][biblio.biblio_name] = biblio.biblio_value item_doc["aliases"] = alias_dict_from_tuples(self.alias_tuples) if item_doc["biblio"]: item_doc["aliases"]["biblio"] = [item_doc["biblio"]] item_doc["metrics"] = {} for metric in self.metrics: metric_name = metric.provider + ":" + metric.metric_name metrics_method_response = (metric.raw_value, metric.drilldown_url) item_doc = add_metrics_data(metric_name, metrics_method_response, item_doc, metric.collected_date.isoformat()) for full_metric_name in item_doc["metrics"]: most_recent_date_so_far = "1900" for this_date in item_doc["metrics"][full_metric_name]["values"]["raw_history"]: if this_date > most_recent_date_so_far: most_recent_date_so_far = this_date item_doc["metrics"][full_metric_name]["values"]["raw"] = item_doc["metrics"][full_metric_name]["values"]["raw_history"][this_date] return item_doc
class Item(db.Model): profile_id = db.Column(db.Integer) tiid = db.Column(db.Text, primary_key=True) created = db.Column(db.DateTime()) last_modified = db.Column(db.DateTime()) last_update_run = db.Column(db.DateTime()) last_refresh_started = db.Column(db.DateTime()) #ALTER TABLE item ADD last_refresh_started timestamp last_refresh_finished = db.Column(db.DateTime()) #ALTER TABLE item ADD last_refresh_finished timestamp last_refresh_status = db.Column(db.Text) #ALTER TABLE item ADD last_refresh_status text last_refresh_failure_message = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) #ALTER TABLE item ADD last_refresh_failure_message text embed_markup = db.Column(db.Text) aliases = db.relationship('Alias', lazy='subquery', cascade="all, delete-orphan", backref=db.backref("item", lazy="subquery")) biblios = db.relationship('Biblio', lazy='subquery', cascade="all, delete-orphan", backref=db.backref("item", lazy="subquery")) metrics = db.relationship('Snap', lazy='noload', cascade="all, delete-orphan", backref=db.backref("item", lazy="noload")) metrics_query = db.relationship('Snap', lazy='dynamic') def __init__(self, **kwargs): # logger.debug(u"new Item {kwargs}".format( # kwargs=kwargs)) if "tiid" in kwargs: self.tiid = kwargs["tiid"] else: shortuuid.set_alphabet('abcdefghijklmnopqrstuvwxyz1234567890') self.tiid = shortuuid.uuid()[0:24] now = datetime.datetime.utcnow() if "created" in kwargs: self.created = kwargs["created"] else: self.created = now if "last_modified" in kwargs: self.last_modified = kwargs["last_modified"] else: self.last_modified = now if "last_update_run" in kwargs: self.last_update_run = kwargs["last_update_run"] else: self.last_update_run = now super(Item, self).__init__(**kwargs) def __repr__(self): return '<Item {tiid}>'.format( tiid=self.tiid) @classmethod def from_tiid(cls, tiid, with_metrics=True): item = cls.query.get(tiid) if not item: return None if with_metrics: item.metrics = item.metrics_query.all() return item @property def alias_tuples(self): return [alias.alias_tuple for alias in self.aliases] @property def biblio_dict(self): response = {} for biblio in self.biblios: response[biblio.biblio_name] = biblio.biblio_value return response @property def biblio_dicts_per_provider(self): response = defaultdict(dict) for biblio in self.biblios: response[biblio.provider][biblio.biblio_name] = biblio.biblio_value return response @property def publication_date(self): publication_date = None for biblio in self.biblios: if biblio.biblio_name == "date": publication_date = biblio.biblio_value continue if (biblio.biblio_name == "year") and biblio.biblio_value: publication_date = datetime.datetime(int(biblio.biblio_value), 12, 31) if not publication_date: publication_date = self.created return publication_date.isoformat() @hybrid_method def published_before(self, mydate): return (self.publication_date < mydate.isoformat()) def has_user_provided_biblio(self): return any([biblio.provider=='user_provided' for biblio in self.biblios]) def has_free_fulltext_url(self): return any([biblio.biblio_name=='free_fulltext_url' for biblio in self.biblios]) def set_last_refresh_start(self): self.last_refresh_started = datetime.datetime.utcnow() self.last_refresh_finished = None self.last_refresh_status = u"STARTED" self.last_refresh_failure_message = None def set_last_refresh_finished(self, myredis): redis_refresh_status = refresh_status(self.tiid, myredis) if not redis_refresh_status["short"].startswith(u"SUCCESS"): self.last_refresh_failure_message = redis_refresh_status["long"] self.last_refresh_status = redis_refresh_status["short"] self.last_refresh_finished = datetime.datetime.utcnow() @classmethod def create_from_old_doc(cls, doc): # logger.debug(u"in create_from_old_doc for {tiid}".format( # tiid=doc["_id"])) doc_copy = copy.deepcopy(doc) doc_copy["tiid"] = doc_copy["_id"] for key in doc_copy.keys(): if key not in ["tiid", "created", "last_modified", "last_update_run"]: del doc_copy[key] new_item_object = Item(**doc_copy) return new_item_object @property def biblio_dict(self): biblio_dict = {} for biblio_obj in self.biblios: if (biblio_obj.biblio_name not in biblio_dict) or (biblio_obj.provider == "user_provided"): biblio_dict[biblio_obj.biblio_name] = biblio_obj.biblio_value return biblio_dict def as_old_doc(self): # logger.debug(u"in as_old_doc for {tiid}".format( # tiid=self.tiid)) item_doc = {} item_doc["_id"] = self.tiid item_doc["last_modified"] = self.last_modified.isoformat() item_doc["created"] = self.created.isoformat() item_doc["last_update_run"] = self.last_update_run.isoformat() item_doc["type"] = "item" item_doc["biblio"] = self.biblio_dict item_doc["aliases"] = alias_dict_from_tuples(self.alias_tuples) if item_doc["biblio"]: item_doc["aliases"]["biblio"] = [item_doc["biblio"]] item_doc["metrics"] = {} for metric in self.metrics: metric_name = metric.provider + ":" + metric.metric_name metrics_method_response = (metric.raw_value, metric.drilldown_url) item_doc = add_metrics_data(metric_name, metrics_method_response, item_doc, metric.collected_date.isoformat()) for full_metric_name in item_doc["metrics"]: most_recent_date_so_far = "1900" for this_date in item_doc["metrics"][full_metric_name]["values"]["raw_history"]: if this_date > most_recent_date_so_far: most_recent_date_so_far = this_date item_doc["metrics"][full_metric_name]["values"]["raw"] = item_doc["metrics"][full_metric_name]["values"]["raw_history"][this_date] return item_doc