Example #1
0
class ApiUser(db.Model):
    api_key = db.Column(db.Text, primary_key=True)
    created = db.Column(db.DateTime())
    planned_use = db.Column(db.Text)
    example_url = db.Column(db.Text)
    api_key_owner = db.Column(db.Text)
    notes = db.Column(db.Text)
    api_key_owner = db.Column(db.Text)
    email = db.Column(db.Text)
    organization = db.Column(db.Text)
    max_registered_items = db.Column(
        db.Numeric
    )  # should be Integer, is Numeric to keep consistent with previous table

    def __init__(self, **kwargs):
        prefix = kwargs["prefix"]
        del kwargs["prefix"]
        self.api_key = self.make_api_key(prefix)
        self.created = datetime.datetime.utcnow()
        super(ApiUser, self).__init__(**kwargs)

    def __repr__(self):
        return '<ApiUser {api_key}, {email}, {api_key_owner}>'.format(
            api_key=self.api_key,
            api_key_owner=self.api_key_owner,
            email=self.email)

    def make_api_key(self, prefix):
        new_api_key = prefix + "-" + shortuuid.uuid()[0:6]
        new_api_key = new_api_key.lower()
        return new_api_key
Example #2
0
class RegisteredItem(db.Model):
    api_key = db.Column(db.Text,
                        db.ForeignKey('api_user.api_key'),
                        primary_key=True)
    api_user = db.relationship('ApiUser',
                               backref=db.backref('registered_items',
                                                  lazy='dynamic'))
    namespace = db.Column(db.Text, primary_key=True)
    nid = db.Column(db.Text, primary_key=True)
    registered_date = db.Column(db.DateTime())

    def __init__(self, alias, api_user):
        alias = item.canonical_alias_tuple(alias)
        (namespace, nid) = alias
        self.namespace = namespace
        self.nid = nid
        self.api_user = api_user
        self.registered_date = datetime.datetime.utcnow()
        super(RegisteredItem, self).__init__()

    @property
    def alias(self):
        return (self.namespace, self.nid)

    def __repr__(self):
        return '<RegisteredItem {api_key}, {alias}>'.format(
            api_key=self.api_user.api_key, alias=self.alias)
Example #3
0
class IncomingEmail(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    created = db.Column(db.DateTime())
    payload = db.Column(db.Text)

    def __init__(self, payload):
        self.payload = json.dumps(payload)
        self.created = datetime.datetime.utcnow()
        super(IncomingEmail, self).__init__()

    @property
    def subject(self):
        payload = json.loads(self.payload)
        return payload["headers"]["Subject"]

    @property
    def email_body(self):
        payload = json.loads(self.payload)
        return payload["plain"]

    def __repr__(self):
        return '<IncomingEmail {id}, {created}, {payload_start}>'.format(
            id=self.id,
            created=self.created,
            payload_start=self.payload[0:100])

    def log_if_google_scholar_notification_confirmation(self):
        GOOGLE_SCHOLAR_CONFIRM_PATTERN = re.compile(
            """for the query:\nNew articles in (?P<name>.*)'s profile\n\nClick to confirm this request:\n(?P<url>.*)\n\n"""
        )
        name = None
        url = None
        try:
            match = GOOGLE_SCHOLAR_CONFIRM_PATTERN.search(self.email_body)
            if match:
                url = match.group("url")
                name = match.group("name")
                logger.info(
                    u"Google Scholar notification confirmation for {name} is at {url}"
                    .format(name=name, url=url))
        except (KeyError, TypeError):
            pass
        return (name, url)

    def log_if_google_scholar_new_articles(self):
        GOOGLE_SCHOLAR_NEW_ARTICLES_PATTERN = re.compile(
            """Scholar Alert - (?P<name>.*) - new articles""")
        name = None
        try:
            match = GOOGLE_SCHOLAR_NEW_ARTICLES_PATTERN.search(self.subject)
            if match:
                name = match.group("name")
                logger.info(
                    u"Just received Google Scholar alert: new articles for {name}, saved at {id}"
                    .format(name=name, id=self.id))
        except (KeyError, TypeError):
            pass
        return (name)
class CollectionTiid(db.Model):
    cid = db.Column(db.Text,
                    db.ForeignKey('collection.cid'),
                    primary_key=True,
                    index=True)
    tiid = db.Column(db.Text, primary_key=True)

    def __init__(self, **kwargs):
        logger.debug(u"new CollectionTiid {kwargs}".format(kwargs=kwargs))
        super(CollectionTiid, self).__init__(**kwargs)

    def __repr__(self):
        return '<CollectionTiid {collection} {tiid}>'.format(
            collection=self.collection, tiid=self.tiid)
Example #5
0
class Alias(db.Model):
    tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True)
    namespace = db.Column(db.Text, primary_key=True)
    nid = db.Column(db.Text, primary_key=True)
    collected_date = db.Column(db.DateTime())

    def __init__(self, **kwargs):
        # logger.debug(u"new Alias {kwargs}".format(
        #     kwargs=kwargs))                

        if "alias_tuple" in kwargs:
            alias_tuple = canonical_alias_tuple(kwargs["alias_tuple"])
            (namespace, nid) = alias_tuple
            self.namespace = namespace
            self.nid = nid                
        if "collected_date" in kwargs:
            self.collected_date = kwargs["collected_date"]
        else:   
            self.collected_date = datetime.datetime.utcnow()

        super(Alias, self).__init__(**kwargs)
        
    @hybrid_property
    def alias_tuple(self):
        return ((self.namespace, self.nid))

    @alias_tuple.setter
    def alias_tuple(self, alias_tuple):
        try:
            (namespace, nid) = alias_tuple
        except ValueError:
            logger.debug("could not separate alias tuple {alias_tuple}".format(
                alias_tuple=alias_tuple))
            raise
        self.namespace = namespace
        self.nid = nid        

    def __repr__(self):
        return '<Alias {item}, {alias_tuple}>'.format(
            item=self.item,
            alias_tuple=self.alias_tuple)

    @classmethod
    def filter_by_alias(cls, alias_tuple):
        alias_tuple = canonical_alias_tuple(alias_tuple)
        (namespace, nid) = alias_tuple
        response = cls.query.filter_by(namespace=namespace, nid=nid)
        return response
Example #6
0
class Collection(db.Model):
    cid = db.Column(db.Text, primary_key=True)
    created = db.Column(db.DateTime())
    last_modified = db.Column(db.DateTime())
    ip_address = db.Column(db.Text)
    title = db.Column(db.Text)
    refset_metadata = db.Column(json_sqlalchemy.JSONAlchemy(db.Text))
    tiid_links = db.relationship('CollectionTiid',
                                 lazy='subquery',
                                 cascade="all, delete-orphan",
                                 backref=db.backref("collection",
                                                    lazy="subquery"))

    # added_items = db.relationship('AddedItem', lazy='subquery', cascade="all, delete-orphan",
    #     backref=db.backref("collection", lazy="subquery"))

    def __init__(self, collection_id=None, **kwargs):
        logger.debug(u"new Collection {kwargs}".format(kwargs=kwargs))

        if collection_id is None:
            collection_id = _make_id()
        self.cid = collection_id

        now = datetime.datetime.utcnow()
        if "created" in kwargs:
            self.created = kwargs["created"]
        else:
            self.created = now

        if "last_modified" in kwargs:
            self.last_modified = kwargs["last_modified"]
        else:
            self.last_modified = now

        super(Collection, self).__init__(**kwargs)

    @property
    def tiids(self):
        return [tiid_link.tiid for tiid_link in self.tiid_links]

    # @property
    # def added_aliases(self):
    #     return [added_item.alias_tuple for added_item in self.added_items]

    def __repr__(self):
        return '<Collection {cid}, {title}>'.format(cid=self.cid,
                                                    title=self.title)

    @classmethod
    def create_from_old_doc(cls, doc):
        doc_copy = copy.deepcopy(doc)
        doc_copy["cid"] = doc_copy["_id"]
        for key in doc_copy.keys():
            if key not in [
                    "cid", "created", "last_modified", "ip_address", "title",
                    "refset_metadata"
            ]:
                del doc_copy[key]
        new_collection_object = Collection(**doc_copy)
        return new_collection_object
Example #7
0
class Snap(db.Model):
    snap_id = db.Column(db.Text, primary_key=True)
    tiid = db.Column(db.Text, db.ForeignKey('item.tiid'))
    provider = db.Column(db.Text)
    interaction = db.Column(db.Text)
    last_collected_date = db.Column(db.DateTime())
    raw_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text))
    drilldown_url = db.Column(db.Text)
    first_collected_date = db.Column(db.DateTime())
    number_times_collected = db.Column(db.Integer)
    query_type = None

    def __init__(self, **kwargs):
        if not "last_collected_date" in kwargs:
            self.last_collected_date = datetime.datetime.utcnow()
        if not "first_collected_date" in kwargs:
            self.first_collected_date = datetime.datetime.utcnow()
        if not "snap_id" in kwargs:
            self.snap_id = shortuuid.uuid()
        if "query_type" in kwargs:
            self.query_type = kwargs["query_type"]
        super(Snap, self).__init__(**kwargs)

    #remove after migration complete
    @property
    def metric_name(self):
        return self.interaction

    @property
    def collected_date(self):
        return self.last_collected_date

    @property
    def fully_qualified_name(self):
        return "{provider}:{interaction}".format(
            provider=self.provider, interaction=self.interaction)

    def __repr__(self):
        return '<Snap {tiid} {provider}:{interaction}={raw_value} on {last_collected_date} via {query_type}>'.format(
            provider=self.provider, 
            interaction=self.interaction, 
            raw_value=self.raw_value, 
            last_collected_date=self.last_collected_date, 
            query_type=self.query_type,
            tiid=self.tiid)
Example #8
0
class Biblio(db.Model):
    tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True)
    provider = db.Column(db.Text, primary_key=True)
    biblio_name = db.Column(db.Text, primary_key=True)
    biblio_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text))
    collected_date = db.Column(db.DateTime())

    def __init__(self, **kwargs):
        # logger.debug(u"new Biblio {kwargs}".format(
        #     kwargs=kwargs))                

        if "collected_date" in kwargs:
            self.collected_date = kwargs["collected_date"]
        else:   
            self.collected_date = datetime.datetime.utcnow()
        if not "provider" in kwargs:
            self.provider = "unknown"
           
        super(Biblio, self).__init__(**kwargs)

    def __repr__(self):
        return '<Biblio {biblio_name}, {item}>'.format(
            biblio_name=self.biblio_name, 
            item=self.item)

    @classmethod
    def filter_by_tiid(cls, tiid):
        response = cls.query.filter_by(tiid=tiid).all()
        return response

    @classmethod
    def as_dict_by_tiid(cls, tiid):
        response = {}
        biblio_elements = cls.query.filter_by(tiid=tiid).all()
        for biblio in biblio_elements:
            response[biblio.biblio_name] = biblio.biblio_value
        return response
class ProviderBatchData(db.Model):
    provider = db.Column(db.Text, primary_key=True)
    min_event_date = db.Column(db.DateTime(), primary_key=True)
    max_event_date = db.Column(db.DateTime())
    raw = db.Column(db.Text)
    aliases = db.Column(json_sqlalchemy.JSONAlchemy(db.Text))
    provider_raw_version = db.Column(db.Numeric)
    created = db.Column(db.DateTime())

    def __init__(self, **kwargs):
        self.created = datetime.datetime.utcnow()
        super(ProviderBatchData, self).__init__(**kwargs)

    def __repr__(self):
        return '<ProviderBatchData {provider}, {min_event_date}, {len_aliases} aliases>'.format(
            provider=self.provider,
            min_event_date=self.min_event_date,
            len_aliases=sum(
                [len(self.aliases[namespace]) for namespace in self.aliases]))
Example #10
0
class Metric(db.Model):
    tiid = db.Column(db.Text, db.ForeignKey('item.tiid'), primary_key=True, index=True)
    provider = db.Column(db.Text, primary_key=True)
    metric_name = db.Column(db.Text, primary_key=True)
    collected_date = db.Column(db.DateTime(), primary_key=True)
    raw_value = db.Column(json_sqlalchemy.JSONAlchemy(db.Text))
    drilldown_url = db.Column(db.Text)

    def __init__(self, **kwargs):
        if "collected_date" in kwargs:
            self.collected_date = kwargs["collected_date"]
        else:
            self.collected_date = datetime.datetime.utcnow()
        super(Metric, self).__init__(**kwargs)

    def __repr__(self):
        return '<Metric {tiid} {provider}:{metric_name}>'.format(
            provider=self.provider, 
            metric_name=self.metric_name, 
            tiid=self.tiid)
Example #11
0
class Item(db.Model):
    tiid = db.Column(db.Text, primary_key=True)
    created = db.Column(db.DateTime())
    last_modified = db.Column(db.DateTime())
    last_update_run = db.Column(db.DateTime())
    aliases = db.relationship('Alias', lazy='subquery', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="subquery"))
    biblios = db.relationship('Biblio', lazy='subquery', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="subquery"))
    metrics = db.relationship('Metric', lazy='noload', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="noload"))
    metrics_query = db.relationship('Metric', lazy='dynamic')

    def __init__(self, **kwargs):
        # logger.debug(u"new Item {kwargs}".format(
        #     kwargs=kwargs))                

        if "tiid" in kwargs:
            self.tiid = kwargs["tiid"]
        else:
            self.tiid = shortuuid.uuid()[0:24]
       
        now = datetime.datetime.utcnow()
        if "created" in kwargs:
            self.created = kwargs["created"]
        else:   
            self.created = now
        if "last_modified" in kwargs:
            self.last_modified = kwargs["last_modified"]
        else:   
            self.last_modified = now
        if "last_update_run" in kwargs:
            self.last_update_run = kwargs["last_update_run"]
        else:   
            self.last_update_run = now

        super(Item, self).__init__(**kwargs)

    def __repr__(self):
        return '<Item {tiid}>'.format(
            tiid=self.tiid)

    @classmethod
    def from_tiid(cls, tiid):
        item = cls.query.get(tiid)
        if not item:
            return None
        item.metrics = item.metrics_query.all()
        return item

    @property
    def alias_tuples(self):
        return [alias.alias_tuple for alias in self.aliases]

    @property
    def publication_date(self):
        publication_date = None
        for biblio in self.biblios:
            if biblio.biblio_name == "date":
                publication_date = biblio.biblio_value
                continue
            if (biblio.biblio_name == "year") and biblio.biblio_value:
                publication_date = datetime.datetime(int(biblio.biblio_value), 12, 31)

        if not publication_date:
            publication_date = self.created
        return publication_date.isoformat()

    @hybrid_method
    def published_before(self, mydate):
        return (self.publication_date < mydate.isoformat())

    @classmethod
    def create_from_old_doc(cls, doc):
        logger.debug(u"in create_from_old_doc for {tiid}".format(
            tiid=doc["_id"]))

        doc_copy = copy.deepcopy(doc)
        doc_copy["tiid"] = doc_copy["_id"]
        for key in doc_copy.keys():
            if key not in ["tiid", "created", "last_modified", "last_update_run"]:
                del doc_copy[key]
        new_item_object = Item(**doc_copy)

        return new_item_object

    def as_old_doc(self):
        # logger.debug(u"in as_old_doc for {tiid}".format(
        #     tiid=self.tiid))

        item_doc = {}
        item_doc["_id"] = self.tiid
        item_doc["last_modified"] = self.last_modified.isoformat()
        item_doc["created"] = self.created.isoformat()
        item_doc["type"] = "item"

        item_doc["biblio"] = {}
        for biblio in self.biblios:
            item_doc["biblio"][biblio.biblio_name] = biblio.biblio_value    

        item_doc["aliases"] = alias_dict_from_tuples(self.alias_tuples)
        if item_doc["biblio"]:
            item_doc["aliases"]["biblio"] = [item_doc["biblio"]]

        item_doc["metrics"] = {}
        for metric in self.metrics:
            metric_name = metric.provider + ":" + metric.metric_name
            metrics_method_response = (metric.raw_value, metric.drilldown_url)
            item_doc = add_metrics_data(metric_name, metrics_method_response, item_doc, metric.collected_date.isoformat())

        for full_metric_name in item_doc["metrics"]:
            most_recent_date_so_far = "1900"
            for this_date in item_doc["metrics"][full_metric_name]["values"]["raw_history"]:
                if this_date > most_recent_date_so_far:
                    most_recent_date_so_far = this_date
                    item_doc["metrics"][full_metric_name]["values"]["raw"] = item_doc["metrics"][full_metric_name]["values"]["raw_history"][this_date]

        return item_doc
Example #12
0
class Item(db.Model):
    profile_id = db.Column(db.Integer)
    tiid = db.Column(db.Text, primary_key=True)
    created = db.Column(db.DateTime())
    last_modified = db.Column(db.DateTime())
    last_update_run = db.Column(db.DateTime())
    last_refresh_started = db.Column(db.DateTime())  #ALTER TABLE item ADD last_refresh_started timestamp
    last_refresh_finished = db.Column(db.DateTime()) #ALTER TABLE item ADD last_refresh_finished timestamp
    last_refresh_status = db.Column(db.Text) #ALTER TABLE item ADD last_refresh_status text
    last_refresh_failure_message = db.Column(json_sqlalchemy.JSONAlchemy(db.Text)) #ALTER TABLE item ADD last_refresh_failure_message text
    embed_markup = db.Column(db.Text)

    aliases = db.relationship('Alias', lazy='subquery', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="subquery"))
    biblios = db.relationship('Biblio', lazy='subquery', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="subquery"))
    metrics = db.relationship('Snap', lazy='noload', cascade="all, delete-orphan",
        backref=db.backref("item", lazy="noload"))
    metrics_query = db.relationship('Snap', lazy='dynamic')

    def __init__(self, **kwargs):
        # logger.debug(u"new Item {kwargs}".format(
        #     kwargs=kwargs))                

        if "tiid" in kwargs:
            self.tiid = kwargs["tiid"]
        else:
            shortuuid.set_alphabet('abcdefghijklmnopqrstuvwxyz1234567890')
            self.tiid = shortuuid.uuid()[0:24]
       
        now = datetime.datetime.utcnow()
        if "created" in kwargs:
            self.created = kwargs["created"]
        else:   
            self.created = now
        if "last_modified" in kwargs:
            self.last_modified = kwargs["last_modified"]
        else:   
            self.last_modified = now
        if "last_update_run" in kwargs:
            self.last_update_run = kwargs["last_update_run"]
        else:   
            self.last_update_run = now

        super(Item, self).__init__(**kwargs)

    def __repr__(self):
        return '<Item {tiid}>'.format(
            tiid=self.tiid)

    @classmethod
    def from_tiid(cls, tiid, with_metrics=True):
        item = cls.query.get(tiid)
        if not item:
            return None
        if with_metrics:
            item.metrics = item.metrics_query.all()
        return item

    @property
    def alias_tuples(self):
        return [alias.alias_tuple for alias in self.aliases]

    @property
    def biblio_dict(self):
        response = {}
        for biblio in self.biblios:
            response[biblio.biblio_name] = biblio.biblio_value
        return response

    @property
    def biblio_dicts_per_provider(self):
        response = defaultdict(dict)
        for biblio in self.biblios:
            response[biblio.provider][biblio.biblio_name] = biblio.biblio_value
        return response        

    @property
    def publication_date(self):
        publication_date = None
        for biblio in self.biblios:
            if biblio.biblio_name == "date":
                publication_date = biblio.biblio_value
                continue
            if (biblio.biblio_name == "year") and biblio.biblio_value:
                publication_date = datetime.datetime(int(biblio.biblio_value), 12, 31)

        if not publication_date:
            publication_date = self.created
        return publication_date.isoformat()

    @hybrid_method
    def published_before(self, mydate):
        return (self.publication_date < mydate.isoformat())

    def has_user_provided_biblio(self):
        return any([biblio.provider=='user_provided' for biblio in self.biblios])

    def has_free_fulltext_url(self):
        return any([biblio.biblio_name=='free_fulltext_url' for biblio in self.biblios])

    def set_last_refresh_start(self):
        self.last_refresh_started = datetime.datetime.utcnow()
        self.last_refresh_finished = None
        self.last_refresh_status = u"STARTED"
        self.last_refresh_failure_message = None

    def set_last_refresh_finished(self, myredis):
        redis_refresh_status = refresh_status(self.tiid, myredis)
        if not redis_refresh_status["short"].startswith(u"SUCCESS"):
            self.last_refresh_failure_message = redis_refresh_status["long"]
        self.last_refresh_status = redis_refresh_status["short"]
        self.last_refresh_finished = datetime.datetime.utcnow()

    @classmethod
    def create_from_old_doc(cls, doc):
        # logger.debug(u"in create_from_old_doc for {tiid}".format(
        #     tiid=doc["_id"]))

        doc_copy = copy.deepcopy(doc)
        doc_copy["tiid"] = doc_copy["_id"]
        for key in doc_copy.keys():
            if key not in ["tiid", "created", "last_modified", "last_update_run"]:
                del doc_copy[key]
        new_item_object = Item(**doc_copy)

        return new_item_object

    @property
    def biblio_dict(self):
        biblio_dict = {}
        for biblio_obj in self.biblios:
            if (biblio_obj.biblio_name not in biblio_dict) or (biblio_obj.provider == "user_provided"):
                    biblio_dict[biblio_obj.biblio_name] = biblio_obj.biblio_value    
        return biblio_dict

    def as_old_doc(self):
        # logger.debug(u"in as_old_doc for {tiid}".format(
        #     tiid=self.tiid))

        item_doc = {}
        item_doc["_id"] = self.tiid
        item_doc["last_modified"] = self.last_modified.isoformat()
        item_doc["created"] = self.created.isoformat()
        item_doc["last_update_run"] = self.last_update_run.isoformat()
        item_doc["type"] = "item"

        item_doc["biblio"] = self.biblio_dict

        item_doc["aliases"] = alias_dict_from_tuples(self.alias_tuples)
        if item_doc["biblio"]:
            item_doc["aliases"]["biblio"] = [item_doc["biblio"]]

        item_doc["metrics"] = {}
        for metric in self.metrics:
            metric_name = metric.provider + ":" + metric.metric_name
            metrics_method_response = (metric.raw_value, metric.drilldown_url)
            item_doc = add_metrics_data(metric_name, metrics_method_response, item_doc, metric.collected_date.isoformat())

        for full_metric_name in item_doc["metrics"]:
            most_recent_date_so_far = "1900"
            for this_date in item_doc["metrics"][full_metric_name]["values"]["raw_history"]:
                if this_date > most_recent_date_so_far:
                    most_recent_date_so_far = this_date
                    item_doc["metrics"][full_metric_name]["values"]["raw"] = item_doc["metrics"][full_metric_name]["values"]["raw_history"][this_date]

        return item_doc