def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open( outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded" ]) out_accounts = csv.writer(g) out_accounts.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID" ]) out_reapps = csv.writer(h) out_reapps.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff" ]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp( latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept ]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj( ): out_reapps.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff ]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER" ]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner ]) print "processed", counter, "journals"
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"]) out_accounts = csv.writer(g) out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"]) out_reapps = csv.writer(h) out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(): out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner]) print "processed", counter, "journals"
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal(**JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion( **ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal( **JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date