Example #1
0
    def get_owner(self):
        b = self.bibjson()
        article_issns = b.get_identifiers(b.P_ISSN)
        article_issns += b.get_identifiers(b.E_ISSN)
        owners = []

        seen_journal_issns = {}
        for issn in article_issns:
            journals = Journal.find_by_issn(issn)
            if journals is not None and len(journals) > 0:
                for j in journals:
                    owners.append(j.owner)
                    if j.owner not in seen_journal_issns:
                        seen_journal_issns[j.owner] = []
                    seen_journal_issns[j.owner] += j.bibjson().issns()

        # deduplicate the list of owners
        owners = list(set(owners))

        # no owner means we can't confirm
        if len(owners) == 0:
            raise NoValidOwnerException

        # multiple owners means ownership of this article is confused
        if len(owners) > 1:
            return NoValidOwnerException

        return owners[0]
Example #2
0
def _get_journal_id_from_issn(issn):
    issn = _normalise_issn(issn)
    journals = Journal.find_by_issn(issn)
    if len(journals) > 1:
        print "WARN: issn", issn, "maps to multiple journals:", ", ".join([j.id for j in journals])
    if len(journals) == 0:
        print "WARN: issn", issn, "does not map to any journals"
    if len(journals) > 0:
        return journals[0].id
def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"])

        out_missing = UnicodeWriter(g)
        out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([application.id, application.last_updated, created, diff])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title])

        print "processed", counter, "suggestions"
Example #4
0
def lookup_subject_categories(issns):
    """ By ISSN, get the subject classification of a journal """

    subjects_column = []

    for i in issns:
        il = [s.strip() for s in i.split(',')]
        j = Journal.find_by_issn(il, in_doaj=False)
        if len(j) == 0:
            subjects_column.append('Error: not found')
        elif len(j) == 1:
            subj = j[0].bibjson().subjects()
            subjects_column.append(', '.join(
                [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj]))
        else:
            subjects_column.append(
                'Error: multiple records found for that ISSN')

    return subjects_column
Example #5
0
    def get_journal(self):
        """
        Get this article's associated journal
        :return: A Journal, or None if this is an orphan article
        """
        bibjson = self.bibjson()

        # first, get the ISSNs associated with the record
        pissns = bibjson.get_identifiers(bibjson.P_ISSN)
        eissns = bibjson.get_identifiers(bibjson.E_ISSN)
        allissns = list(set(pissns + eissns))

        # find a matching journal record from the index
        journal = None
        for issn in allissns:
            journals = Journal.find_by_issn(issn)
            if len(journals) > 0:
                # there should only ever be one, so take the first one
                journal = journals[0]
                break

        return journal
Example #6
0
File: article.py Project: DOAJ/doaj
    def get_journal(self):
        """
        Get this article's associated journal
        :return: A Journal, or None if this is an orphan article
        """
        bibjson = self.bibjson()

        # first, get the ISSNs associated with the record
        pissns = bibjson.get_identifiers(bibjson.P_ISSN)
        eissns = bibjson.get_identifiers(bibjson.E_ISSN)
        allissns = list(set(pissns + eissns))

        # find a matching journal record from the index
        journal = None
        for issn in allissns:
            journals = Journal.find_by_issn(issn)
            if len(journals) > 0:
                # there should only ever be one, so take the first one
                journal = journals[0]
                break

        return journal
def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb",
                     "utf-8") as f, codecs.open(outfile_missing, "wb",
                                                "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow([
            "Application ID", "Application Last Updated",
            "Latest Provenance Recorded", "Difference"
        ])

        out_missing = UnicodeWriter(g)
        out_missing.writerow([
            "Application ID", "Application Last Manual Update",
            "Latest Provenance Record", "ISSNs", "Title"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([
                        application.id, application.last_updated, created, diff
                    ])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(
                    application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(
                        application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len(
                        [j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([
                        application.id, application.last_manual_update,
                        created, " ".join(application.bibjson().issns()),
                        application.bibjson().title
                    ])

        print "processed", counter, "suggestions"
Example #8
0
 def get_associated_journals(self):
     # find all matching journal record from the index
     allissns = self.bibjson().issns()
     return Journal.find_by_issn(allissns)
Example #9
0
    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals,
                                      key=lambda j: j.created_timestamp)

            # we set an application as having a related journal in the following conditions:
            # 1. The application was created before the journal and last updated near or after the journal created date,
            #       and this journal is the nearest one in time
            # 2. The last_reapplication date is after the application created date, and is the nearest one
            app_created = application.created_timestamp
Example #10
0
    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals, key=lambda j: j.created_timestamp)

            # we set an application as having a related journal in the following conditions:
            # 1. The application was created before the journal and last updated near or after the journal created date,
            #       and this journal is the nearest one in time
            # 2. The last_reapplication date is after the application created date, and is the nearest one
            app_created = application.created_timestamp
            for journal in related_journals:
Example #11
0
File: article.py Project: DOAJ/doaj
 def get_associated_journals(self):
     # find all matching journal record from the index
     allissns = self.bibjson().issns()
     return Journal.find_by_issn(allissns)