Ejemplo n.º 1
0
 def process_doi(self, doi, doi_curator, doi_source_provider):
     existing_res = self.rf.retrieve_from_doi(doi)
     if existing_res is None:
         cur_json = self.get_crossref_item(self.__process_entity(doi, self.crossref_api_works))
         if cur_json is not None:
             return self.process_crossref_json(
                 cur_json, self.crossref_api_works + encode_url(doi), doi_curator,
                 doi_source_provider, self.source)
     else:
         return self.process_existing_by_id(existing_res, self.id)
Ejemplo n.º 2
0
    def process_article(self,
                        cur_id,
                        cur_source,
                        cur_doi,
                        cur_pmid,
                        cur_pmcid,
                        oa=False,
                        intext_refs=False):
        if cur_doi is None and cur_pmid is not None:
            cur_doi = self.__get_doi_from_xml_source(cur_pmid)
        cur_localid = cur_source + "-" + cur_id
        id_list = [cur_doi, cur_pmid, cur_pmid, cur_localid]
        if not self.rs.is_any_stored(id_list):
            self.repok.new_article()
            self.repok.add_sentence("Processing article with local id '%s'." %
                                    cur_localid)

            if oa and not intext_refs:
                ref_list_url = self.process_xml_source(cur_pmid,
                                                       cur_doi,
                                                       intext_refs=False)
            elif oa and intext_refs:
                ref_list_url = self.process_xml_source(cur_pmid,
                                                       cur_doi,
                                                       intext_refs=True)
            else:
                ref_list_url = self.process_references(cur_source, cur_id)
            if ref_list_url is not None:
                stored = self.rs.store(
                    next(item for item in id_list if item is not None),
                    cur_localid, cur_doi, cur_pmid, cur_pmcid, self.name,
                    self.provider, encode_url(ref_list_url), True)
                if stored:
                    self.repok.add_sentence(
                        "References of '%s' have been stored." % cur_localid)
                else:
                    self.repok.add_sentence(
                        "Something went wrong in storing the references of '%s'."
                        % cur_localid)
            else:
                self.reper.add_sentence(
                    "The article '%s' has no references or its PubMed Central "
                    "ID is not defined." % cur_localid)
        else:
            self.repok.add_sentence(
                "The article '%s' has been already stored." % cur_localid)
Ejemplo n.º 3
0
    def process_doi(self,
                    doi: str,
                    doi_curator: str,
                    doi_source_provider: str,
                    check=False,
                    result=None,
                    typ='both'):
        """
        Process a DOI searching for it on Crossref (local/remote).

        Parameters
        ----------
        :param doi: The DOI to be searched.
        :param doi_curator : The curator(URL), e.g.: https://api.crossref.org/works/
        :param doi_source_provider: The source provider, e.g.: Europe PubMed Central
        :param check: Set it to True only in the tests in order to return the json
        :param result: A result retrieved with the query_interface during the Bibentry creation process
        :param typ: A string that can be 'both', 'only_local', or 'only_blazegraph'. Useful when you want to query
                    only on a specific kind of store.

        """

        # Check if we already have this resource
        existing_res = self.rf.retrieve_from_doi(doi, typ=typ)

        # Otherwise query for it
        if existing_res is None:
            if result is None:
                cur_json = self.query_interface.get_data_crossref_doi(doi)
            else:
                cur_json = result

            if cur_json is not None:
                if check:
                    return cur_json
                else:
                    return self.process_crossref_json(
                        cur_json, self.crossref_api_works + encode_url(doi),
                        doi_curator, doi_source_provider, self.source)
        else:
            return self.process_existing_by_id(existing_res, self.id)
Ejemplo n.º 4
0
 def create_url(self, string):
     return self._associate_identifier_with_scheme(
         encode_url(string.lower()), GraphEntity.url)
Ejemplo n.º 5
0
    def process_article(self, paper, oa=False, intext_refs=False):
        cur_source = "MED"
        cur_doi = paper["cur_doi"]
        cur_pmid = int(paper["cur_pmid"])
        cur_pmcid = paper["cur_pmcid"]
        cur_name = paper["cur_name"]

        if cur_pmid != 0:
            cur_id = "PMID{}".format(cur_pmid)
        elif cur_pmcid != 0:
            cur_id = "{}".format(cur_pmcid)
        elif cur_doi is not None and cur_doi != "":
            cur_id = "DOI{}".format(cur_doi)
        else:
            self.repok.add_sentence("No id for this paper")
            return

        references = json.loads(paper["references"])

        cur_localid = "{}-{}".format(cur_source, cur_id)
        id_list = [str(cur_doi), str(cur_pmid), str(cur_pmid), cur_localid]

        if not self.rs.is_any_stored(id_list):

            self.repok.new_article()
            self.repok.add_sentence(
                "Processing article with local id {}".format(cur_localid))

            if oa and not intext_refs:
                ref_list_url, ref_list, ref_pointer_list = self.process_xml_source(
                    cur_pmid, cur_name, cur_doi, references, intext_refs=False)
            elif oa and intext_refs:
                ref_list_url, ref_list, ref_pointer_list = self.process_xml_source(
                    cur_pmid, cur_name, cur_doi, references, intext_refs=True)
            #else:
            #    ref_list_url = self.process_references(cur_source, cur_id)

            if ref_list_url is not None:
                if cur_pmid == 0 or cur_pmid is None:
                    cur_pmid = ""
                if cur_pmcid == 0 or cur_pmid is None:
                    cur_pmcid = ""

                json_item = {}
                json_item["references"] = ref_list
                if cur_localid != "":
                    json_item["localid"] = cur_id
                if cur_doi != "" and cur_doi != "nan" and cur_doi is not None:
                    json_item["doi"] = str(cur_doi)
                if cur_pmid != "" and cur_pmid != "0" and cur_pmid is not None:
                    json_item["pmid"] = str(cur_pmid)
                if cur_pmcid != "" and cur_pmcid != "0" and cur_pmcid is not None:
                    json_item["pmcid"] = str(cur_pmcid)
                if self.name is not None:
                    json_item["curator"] = str(self.name)
                if self.provider is not None:
                    json_item["source_provider"] = str(self.provider)
                if encode_url(ref_list_url) is not None:
                    json_item["source"] = str(encode_url(ref_list_url))
                if len(ref_pointer_list):
                    json_item["reference_pointers"] = ref_pointer_list

                cur_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f_')
                local_file_name = str(uuid.uuid4()) + ".json"
                local_dir_name = self.rs.new_supplier() + re.sub(
                    "^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1", cur_time)

                new_dir_path = self.rs.ref_dir + os.sep + local_dir_name
                new_file_path = new_dir_path + os.sep + local_file_name

                if not os.path.exists(new_dir_path):
                    os.makedirs(new_dir_path)
                try:
                    with open(new_file_path, "w") as f:
                        json.dump(json_item, f, indent=4, ensure_ascii=False)
                        if cur_localid not in self.rs.stored:
                            with open(self.rs.csv_file, "a") as name_f:
                                name_f.write(cur_localid + "\n")
                                self.rs.stored.add(cur_localid)
                                return True
                except Exception as e:
                    print(e, "\n\n", json_item, "\n\n")

            else:
                self.reper.add_sentence(
                    "The article '%s' has no references or its PubMed Central "
                    "ID is not defined." % cur_localid)
        else:
            self.repok.add_sentence(
                "The article '%s' has been already stored." % cur_localid)