예제 #1
0
    def metrics(
            self,
            aliases,
            provider_url_template=None,  # ignore this because multiple url steps
            cache_enabled=True):

        # if haven't loaded batch_data, return no metrics
        global batch_data
        if not batch_data:
            batch_data = self.build_batch_data_dict()
            pass

        metrics_and_drilldown = {}

        # Only lookup metrics for items with appropriate ids
        from totalimpact import item
        aliases_dict = item.alias_dict_from_tuples(aliases)
        try:
            pmid = aliases_dict["pmid"][0]
        except KeyError:
            return {}

        pmid_alias = ("pmid", pmid)
        page = ""

        if pmid_alias in batch_data:
            pages = [page["raw"] for page in batch_data[pmid_alias]]
        if page:
            metrics_and_drilldown = self._get_metrics_and_drilldown(
                pages, pmid)

        return metrics_and_drilldown
예제 #2
0
    def metrics(self, 
            aliases,
            provider_url_template=None, # ignore this because multiple url steps
            cache_enabled=True):

        metrics_and_drilldown = {}

        # Only lookup metrics for items with appropriate ids
        from totalimpact import item
        aliases_dict = item.alias_dict_from_tuples(aliases)
        try:
            pmid = aliases_dict["pmid"][0]
        except KeyError:
            return {}
            
        pmid_alias = ("pmid", pmid)
        page = ""

        global batch_data
        if pmid_alias in batch_data:
            pages = [page["raw"] for page in batch_data[pmid_alias]]
        if page:
            metrics_and_drilldown = self._get_metrics_and_drilldown(pages, pmid)

        return metrics_and_drilldown
예제 #3
0
def provider_method_wrapper(tiid, input_aliases_dict, provider, method_name):

    # logger.info(u"{:20}: in provider_method_wrapper with {tiid} {provider_name} {method_name} with {aliases}".format(
    #    "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=input_aliases_dict))

    provider_name = provider.provider_name
    worker_name = provider_name+"_worker"

    if isinstance(input_aliases_dict, list):
        input_aliases_dict = item_module.alias_dict_from_tuples(input_aliases_dict)    

    input_alias_tuples = item_module.alias_tuples_from_dict(input_aliases_dict)
    method = getattr(provider, method_name)

    try:
        method_response = method(input_alias_tuples)
    except ProviderError, e:
        method_response = None

        logger.info(u"{:20}: **ProviderError {tiid} {method_name} {provider_name}, Exception type {exception_type} {exception_arguments}".format(
            worker_name, 
            tiid=tiid, 
            provider_name=provider_name.upper(), 
            method_name=method_name.upper(), 
            exception_type=type(e).__name__, 
            exception_arguments=e.args))
예제 #4
0
def add_to_database_if_nonzero( 
        tiid, 
        new_content, 
        method_name, 
        provider_name):

    if new_content:
        # don't need item with metrics for this purpose, so don't bother getting metrics from db

        item_obj = item_module.Item.query.get(tiid)

        if item_obj:
            if method_name=="aliases":
                if isinstance(new_content, list):
                    new_content = item_module.alias_dict_from_tuples(new_content)    
                item_obj = item_module.add_aliases_to_item_object(new_content, item_obj)
            elif method_name=="biblio":
                updated_item_doc = item_module.update_item_with_new_biblio(new_content, item_obj, provider_name)
            elif method_name=="metrics":
                for metric_name in new_content:
                    item_obj = item_module.add_metric_to_item_object(metric_name, new_content[metric_name], item_obj)
            else:
                logger.warning(u"ack, supposed to save something i don't know about: " + str(new_content))

    return
예제 #5
0
    def wrapper(cls, tiid, input_aliases_dict, provider, method_name,
                aliases_providers_run, callback):
        #logger.info(u"{:20}: **Starting {tiid} {provider_name} {method_name} with {aliases}".format(
        #    "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=aliases))

        provider_name = provider.provider_name
        worker_name = provider_name + "_worker"

        input_alias_tuples = item_module.alias_tuples_from_dict(
            input_aliases_dict)
        method = getattr(provider, method_name)

        try:
            method_response = method(input_alias_tuples)
        except ProviderError:
            method_response = None
            logger.info(
                u"{:20}: **ProviderError {tiid} {method_name} {provider_name} "
                .format(worker_name,
                        tiid=tiid,
                        provider_name=provider_name.upper(),
                        method_name=method_name.upper()))

        if method_name == "aliases":
            # update aliases to include the old ones too
            aliases_providers_run += [provider_name]
            if method_response:
                new_aliases_dict = item_module.alias_dict_from_tuples(
                    method_response)
                new_canonical_aliases_dict = item_module.canonical_aliases(
                    new_aliases_dict)
                response = item_module.merge_alias_dicts(
                    new_canonical_aliases_dict, input_aliases_dict)
            else:
                response = input_aliases_dict
        else:
            response = method_response

        logger.info(
            u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {response}"
            .format(worker_name,
                    tiid=tiid,
                    method_name=method_name.upper(),
                    provider_name=provider_name.upper(),
                    response=response))

        callback(tiid, response, method_name, aliases_providers_run)

        try:
            del thread_count[provider_name][tiid + method_name]
        except KeyError:  # thread isn't there when we call wrapper in unit tests
            pass

        return response
예제 #6
0
    def metrics(
            self,
            aliases,
            provider_url_template=None,  # ignore this because multiple url steps
            cache_enabled=True):

        # Only lookup metrics for items with appropriate ids
        from totalimpact import item
        aliases_dict = item.alias_dict_from_tuples(aliases)

        metrics_page = None
        # try lookup by doi
        try:
            metrics_page = self._get_metrics_lookup_page(
                self.metrics_from_doi_template, aliases_dict["doi"][0],
                cache_enabled)
        except KeyError:
            pass
        # try lookup by pmid
        if not metrics_page:
            try:
                metrics_page = self._get_metrics_lookup_page(
                    self.metrics_from_pmid_template, aliases_dict["pmid"][0],
                    cache_enabled)
            except KeyError:
                pass
        # try lookup by title
        if not metrics_page:
            try:
                page = self._get_uuid_lookup_page(
                    aliases_dict["biblio"][0]["title"], cache_enabled)
                if page:
                    uuid = self._get_uuid_from_title(aliases_dict,
                                                     page)["uuid"]
                    if uuid:
                        logger.debug(
                            u"Mendeley: uuid is %s for %s" %
                            (uuid, aliases_dict["biblio"][0]["title"]))
                        metrics_page = self._get_metrics_lookup_page(
                            self.metrics_from_uuid_template, uuid)
                    else:
                        logger.debug(u"Mendeley: couldn't find uuid for %s" %
                                     (aliases_dict["biblio"][0]["title"]))
            except (KeyError, TypeError):
                pass
        # give up!
        if not metrics_page:
            return {}

        metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(
            metrics_page)

        return metrics_and_drilldown
예제 #7
0
    def wrapper(cls, tiid, input_aliases_dict, provider, method_name, aliases_providers_run, callback):
        # logger.info(u"{:20}: **Starting {tiid} {provider_name} {method_name} with {aliases}".format(
        #    "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=aliases))

        provider_name = provider.provider_name
        worker_name = provider_name + "_worker"

        input_alias_tuples = item_module.alias_tuples_from_dict(input_aliases_dict)
        method = getattr(provider, method_name)

        try:
            method_response = method(input_alias_tuples)
        except ProviderError:
            method_response = None
            logger.info(
                u"{:20}: **ProviderError {tiid} {method_name} {provider_name} ".format(
                    worker_name, tiid=tiid, provider_name=provider_name.upper(), method_name=method_name.upper()
                )
            )

        if method_name == "aliases":
            # update aliases to include the old ones too
            aliases_providers_run += [provider_name]
            if method_response:
                new_aliases_dict = item_module.alias_dict_from_tuples(method_response)
                new_canonical_aliases_dict = item_module.canonical_aliases(new_aliases_dict)
                response = item_module.merge_alias_dicts(new_canonical_aliases_dict, input_aliases_dict)
            else:
                response = input_aliases_dict
        else:
            response = method_response

        logger.info(
            u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {response}".format(
                worker_name,
                tiid=tiid,
                method_name=method_name.upper(),
                provider_name=provider_name.upper(),
                response=response,
            )
        )

        callback(tiid, response, method_name, aliases_providers_run)

        try:
            del thread_count[provider_name][tiid + method_name]
        except KeyError:  # thread isn't there when we call wrapper in unit tests
            pass

        return response
예제 #8
0
    def get_best_url(self, aliases):
        filtered = self.relevant_aliases(aliases)
        if filtered:
            from totalimpact import item
            aliases_dict = item.alias_dict_from_tuples(aliases)

            if "doi" in aliases_dict:
                return u"http://doi.org/" + aliases_dict["doi"][0]
            if "pmid" in aliases_dict:
                return u"http://www.ncbi.nlm.nih.gov/pubmed/" + aliases_dict["pmid"][0]
            if "pmc" in aliases_dict:
                return u"http://www.ncbi.nlm.nih.gov/pmc/articles/" + aliases_dict["pmc"][0]
            if "url" in aliases_dict:
                return aliases_dict["url"][0]
        return None
예제 #9
0
    def _get_doc(self, aliases):
        from totalimpact import item
        aliases_dict = item.alias_dict_from_tuples(aliases)

        if not self.session:
            self.session = self._connect()
        doc = None

        lookup_by = [namespace for namespace in aliases_dict.keys() if namespace in ["doi", "pmid", "arxiv"]]
        if lookup_by:
            doc = self._get_doc_by_id(lookup_by[0], aliases_dict)
            
        if not doc and ("biblio" in aliases_dict):
            doc = self._get_doc_by_title(aliases_dict)

        return doc
예제 #10
0
    def get_best_url(self, aliases):
        filtered = self.relevant_aliases(aliases)
        if filtered:
            from totalimpact import item
            aliases_dict = item.alias_dict_from_tuples(aliases)

            if "doi" in aliases_dict:
                return u"http://doi.org/" + aliases_dict["doi"][0]
            if "pmid" in aliases_dict:
                return u"http://www.ncbi.nlm.nih.gov/pubmed/" + aliases_dict[
                    "pmid"][0]
            if "pmc" in aliases_dict:
                return u"http://www.ncbi.nlm.nih.gov/pmc/articles/" + aliases_dict[
                    "pmc"][0]
            if "url" in aliases_dict:
                return aliases_dict["url"][0]
        return None
예제 #11
0
    def metrics(
        self, aliases, provider_url_template=None, cache_enabled=True  # ignore this because multiple url steps
    ):

        # Only lookup metrics for items with appropriate ids
        from totalimpact import item

        aliases_dict = item.alias_dict_from_tuples(aliases)

        metrics_page = None
        # try lookup by doi
        try:
            metrics_page = self._get_metrics_lookup_page(
                self.metrics_from_doi_template, aliases_dict["doi"][0], cache_enabled
            )
        except KeyError:
            pass
        # try lookup by pmid
        if not metrics_page:
            try:
                metrics_page = self._get_metrics_lookup_page(
                    self.metrics_from_pmid_template, aliases_dict["pmid"][0], cache_enabled
                )
            except KeyError:
                pass
        # try lookup by title
        if not metrics_page:
            try:
                page = self._get_uuid_lookup_page(aliases_dict["biblio"][0]["title"], cache_enabled)
                if page:
                    uuid = self._get_uuid_from_title(aliases_dict, page)["uuid"]
                    if uuid:
                        logger.debug(u"Mendeley: uuid is %s for %s" % (uuid, aliases_dict["biblio"][0]["title"]))
                        metrics_page = self._get_metrics_lookup_page(self.metrics_from_uuid_template, uuid)
                    else:
                        logger.debug(u"Mendeley: couldn't find uuid for %s" % (aliases_dict["biblio"][0]["title"]))
            except (KeyError, TypeError):
                pass
        # give up!
        if not metrics_page:
            return {}

        metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(metrics_page)

        return metrics_and_drilldown
예제 #12
0
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis):
    item_objects = item_module.Item.query.filter(item_module.Item.tiid.in_(tiids)).all()
    dicts_to_refresh = []  

    for item_obj in item_objects:
        try:
            tiid = item_obj.tiid
            item_obj.set_last_refresh_start()
            db.session.add(item_obj)
            alias_dict = item_module.alias_dict_from_tuples(item_obj.alias_tuples)       
            dicts_to_refresh += [{"tiid":tiid, "aliases_dict": alias_dict}]
        except AttributeError:
            logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format(
                tiid=tiid))

    db.session.commit()

    item_module.start_item_update(dicts_to_refresh, priority, myredis)
    return tiids
예제 #13
0
    def aliases(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True):            

        doc = self._get_doc(aliases)
        new_aliases = []
        if doc:  
            from totalimpact import item
            aliases_dict = item.alias_dict_from_tuples(aliases)
            print doc.identifiers

            for namespace in doc.identifiers:
                if namespace in ["doi", "arxiv", "pmid", "scopus"] and (namespace not in aliases_dict):
                    new_aliases += [(namespace, doc.identifiers[namespace])]

            new_aliases += [("url", doc.link)]
            new_aliases += [("mendeley_uuid", doc.id)]
            new_aliases = [alias for alias in new_aliases if alias not in aliases]

        return new_aliases
예제 #14
0
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis):
    item_objects = item_module.Item.query.filter(
        item_module.Item.tiid.in_(tiids)).all()
    dicts_to_refresh = []

    for item_obj in item_objects:
        try:
            tiid = item_obj.tiid
            item_obj.set_last_refresh_start()
            db.session.add(item_obj)
            alias_dict = item_module.alias_dict_from_tuples(
                item_obj.alias_tuples)
            dicts_to_refresh += [{"tiid": tiid, "aliases_dict": alias_dict}]
        except AttributeError:
            logger.debug(
                u"couldn't find tiid {tiid} so not refreshing its metrics".
                format(tiid=tiid))

    db.session.commit()

    item_module.start_item_update(dicts_to_refresh, priority, myredis)
    return tiids
예제 #15
0
 def test_alias_dict_from_tuples(self):
     aliases = [('unknown_namespace', 'myname')]
     alias_dict = item_module.alias_dict_from_tuples(aliases)
     assert_equals(alias_dict, {'unknown_namespace': ['myname']})
예제 #16
0
 def test_alias_dict_from_tuples(self):
     aliases = [('unknown_namespace', 'myname')]
     alias_dict = item_module.alias_dict_from_tuples(aliases)
     assert_equals(alias_dict, {'unknown_namespace': ['myname']})
예제 #17
0
        method_response = None

        logger.info(u"{:20}: **ProviderError {tiid} {method_name} {provider_name}, Exception type {exception_type} {exception_arguments}".format(
            worker_name, 
            tiid=tiid, 
            provider_name=provider_name.upper(), 
            method_name=method_name.upper(), 
            exception_type=type(e).__name__, 
            exception_arguments=e.args))

    logger.info(u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {method_response}".format(
        worker_name, tiid=tiid, method_name=method_name.upper(), 
        provider_name=provider_name.upper(), method_response=method_response))

    if method_name == "aliases" and method_response:
        initial_alias_dict = item_module.alias_dict_from_tuples(method_response)
        new_canonical_aliases_dict = item_module.canonical_aliases(initial_alias_dict)
        full_aliases_dict = item_module.merge_alias_dicts(new_canonical_aliases_dict, input_aliases_dict)
    else:
        full_aliases_dict = input_aliases_dict

    add_to_database_if_nonzero(tiid, method_response, method_name, provider_name)

    return full_aliases_dict




# last variable is an artifact so it has same call signature as other callbacks
def add_to_database_if_nonzero( 
        tiid,