Example #1
0
 def test_load_csv_column_as_set(self):
     oci_set = CSVManager.load_csv_column_as_set(self.citation_path, "oci",
                                                 4)
     self.assertSetEqual(
         oci_set, {
             "02001000308362819371213133704040001020809-020010009063615193700006300030306151914",
             "02001000002361927283705040000-02001000002361927283705030002",
             "02001000002361927283705040000-020010003093612062710020603000720",
             "02001000308362819371213133704040001020804-02001000308362819371213133704040000030707",
             "020010000023625242110370100030001-02001010009361222251430273701090809370903040403",
             "020010103003602000105370205010358000059-02001010304362801000208030304330009000400020107"
         })
Example #2
0
def extract_citations(idbaseurl,
                      baseurl,
                      python,
                      pclass,
                      input,
                      lookup,
                      data,
                      prefix,
                      agent,
                      source,
                      service,
                      verbose,
                      doi_manager,
                      rf_handler,
                      oci_to_do=None):
    BASE_URL = idbaseurl
    DATASET_URL = baseurl + "/" if not baseurl.endswith("/") else baseurl

    oci_manager = OCIManager(lookup_file=lookup)
    exi_ocis = CSVManager.load_csv_column_as_set(
        data + sep + "data", "oci"
    )  # TODO: we need to specify carefully the dir, eg by adding an additional flag to distinguish between the files belonging to a particular process, and it should be aligned with the storer.
    if oci_to_do is not None:
        oci_to_do.difference_update(exi_ocis)
    cit_storer = CitationStorer(data, DATASET_URL)

    citations_already_present = 0
    new_citations_added = 0
    error_in_dois_existence = 0

    cs = import_citation_source(python, pclass, input)
    next_citation = cs.get_next_citation_data()

    while next_citation is not None:
        citing, cited, created, timespan, journal_sc, author_sc = next_citation
        oci = oci_manager.get_oci(citing, cited, prefix)
        oci_noprefix = oci.replace("oci:", "")
        if oci_noprefix not in exi_ocis and (oci_to_do is None
                                             or oci_noprefix in oci_to_do):
            if doi_manager.is_valid(citing) and doi_manager.is_valid(cited):
                if created is None:
                    citing_date = rf_handler.get_date(citing)
                else:
                    citing_date = created
                cited_date = rf_handler.get_date(cited)
                if journal_sc is None or type(journal_sc) is not bool:
                    journal_sc = rf_handler.share_issn(citing, cited)
                if author_sc is None or type(author_sc) is not bool:
                    author_sc = rf_handler.share_orcid(citing, cited)

                if created is not None and timespan is not None:
                    cit = Citation(
                        oci, BASE_URL + quote(citing), None,
                        BASE_URL + quote(cited), None, created, timespan, 1,
                        agent, source,
                        datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), service,
                        "doi", BASE_URL + "([[XXX__decode]])", "reference",
                        journal_sc, author_sc, None,
                        "Creation of the citation", None)
                else:
                    cit = Citation(
                        oci, BASE_URL + quote(citing), citing_date,
                        BASE_URL + quote(cited), cited_date, None, None, 1,
                        agent, source,
                        datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), service,
                        "doi", BASE_URL + "([[XXX__decode]])", "reference",
                        journal_sc, author_sc, None,
                        "Creation of the citation", None)

                cit_storer.store_citation(cit)

                if verbose:
                    print(
                        "Create citation data for '%s' between DOI '%s' and DOI '%s'"
                        % (oci, citing, cited))
                new_citations_added += 1
                exi_ocis.add(oci_noprefix)
            else:
                if verbose:
                    print(
                        "WARNING: some DOIs, among '%s' and '%s', do not exist"
                        % (citing, cited))
                error_in_dois_existence += 1
            if oci_to_do is not None:
                oci_to_do.remove(oci_noprefix)
        else:
            if verbose:
                print(
                    "WARNING: the citation between DOI '%s' and DOI '%s' has been already processed"
                    % (citing, cited))
            citations_already_present += 1

        next_citation = cs.get_next_citation_data()

    return new_citations_added, citations_already_present, error_in_dois_existence