def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"]) out_missing = UnicodeWriter(g) out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([application.id, application.last_updated, created, diff]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn(application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title(application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title]) print "processed", counter, "suggestions"
def make_application_spread(cls, desired_output, period): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month+1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) apps = [] for row in desired_output: country = row[0] del row[0] for i in range(len(row)): count = row[i] start, end = ranges[i] for j in range(count): s = Suggestion() s.set_created(dates.random_date(start, end)) s.bibjson().country = country apps.append(s) return apps
def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow([ "Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference" ]) out_missing = UnicodeWriter(g) out_missing.writerow([ "Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title" ]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([ application.id, application.last_updated, created, diff ]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn( application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title( application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len( [j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([ application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title ]) print "processed", counter, "suggestions"
"application", "app_created", "app_last_update", "app_last_manual_update", "app_adjusted_lmu", "app_issns", "journal_matches", "journal", "journal_created", "journal_reapp", "journal_issns", "jc_ac_diff", "jc_lmua_diff", "mc1", "lra_ac_diff", "mc2", "is_match", "reason" ]) # first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals,
writer.writerow([ "application", "app_created", "app_last_update", "app_last_manual_update", "app_adjusted_lmu", "app_issns", "journal_matches", "journal", "journal_created", "journal_reapp", "journal_issns", "jc_ac_diff", "jc_lmua_diff", "mc1", "lra_ac_diff", "mc2", "is_match", "reason" ]) # first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals, key=lambda j: j.created_timestamp)
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal(**JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion( **ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal( **JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date