def get_owner(self): b = self.bibjson() article_issns = b.get_identifiers(b.P_ISSN) article_issns += b.get_identifiers(b.E_ISSN) owners = [] seen_journal_issns = {} for issn in article_issns: journals = Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: owners.append(j.owner) if j.owner not in seen_journal_issns: seen_journal_issns[j.owner] = [] seen_journal_issns[j.owner] += j.bibjson().issns() # deduplicate the list of owners owners = list(set(owners)) # no owner means we can't confirm if len(owners) == 0: raise NoValidOwnerException # multiple owners means ownership of this article is confused if len(owners) > 1: return NoValidOwnerException return owners[0]
def _get_journal_id_from_issn(issn): issn = _normalise_issn(issn) journals = Journal.find_by_issn(issn) if len(journals) > 1: print "WARN: issn", issn, "maps to multiple journals:", ", ".join([j.id for j in journals]) if len(journals) == 0: print "WARN: issn", issn, "does not map to any journals" if len(journals) > 0: return journals[0].id
def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"]) out_missing = UnicodeWriter(g) out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([application.id, application.last_updated, created, diff]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn(application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title(application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title]) print "processed", counter, "suggestions"
def lookup_subject_categories(issns): """ By ISSN, get the subject classification of a journal """ subjects_column = [] for i in issns: il = [s.strip() for s in i.split(',')] j = Journal.find_by_issn(il, in_doaj=False) if len(j) == 0: subjects_column.append('Error: not found') elif len(j) == 1: subj = j[0].bibjson().subjects() subjects_column.append(', '.join( [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj])) else: subjects_column.append( 'Error: multiple records found for that ISSN') return subjects_column
def get_journal(self): """ Get this article's associated journal :return: A Journal, or None if this is an orphan article """ bibjson = self.bibjson() # first, get the ISSNs associated with the record pissns = bibjson.get_identifiers(bibjson.P_ISSN) eissns = bibjson.get_identifiers(bibjson.E_ISSN) allissns = list(set(pissns + eissns)) # find a matching journal record from the index journal = None for issn in allissns: journals = Journal.find_by_issn(issn) if len(journals) > 0: # there should only ever be one, so take the first one journal = journals[0] break return journal
def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow([ "Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference" ]) out_missing = UnicodeWriter(g) out_missing.writerow([ "Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title" ]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([ application.id, application.last_updated, created, diff ]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn( application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title( application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len( [j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([ application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title ]) print "processed", counter, "suggestions"
def get_associated_journals(self): # find all matching journal record from the index allissns = self.bibjson().issns() return Journal.find_by_issn(allissns)
# first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals, key=lambda j: j.created_timestamp) # we set an application as having a related journal in the following conditions: # 1. The application was created before the journal and last updated near or after the journal created date, # and this journal is the nearest one in time # 2. The last_reapplication date is after the application created date, and is the nearest one app_created = application.created_timestamp
# first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals, key=lambda j: j.created_timestamp) # we set an application as having a related journal in the following conditions: # 1. The application was created before the journal and last updated near or after the journal created date, # and this journal is the nearest one in time # 2. The last_reapplication date is after the application created date, and is the nearest one app_created = application.created_timestamp for journal in related_journals: