Exemple #1
0
    def get_journal(cls, specs):
        journals = []

        for spec in specs:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**source)
            bj = j.bibjson()
            bj.title = spec.get("title", "Journal Title")
            bj.remove_identifiers()
            if "pissn" in spec:
                bj.add_identifier(bj.P_ISSN, spec.get("pissn"))
            if "eissn" in spec:
                bj.add_identifier(bj.E_ISSN, spec.get("eissn"))
            spec["instance"] = j
            journals.append(spec)

        def mock(self):
            bibjson = self.bibjson()

            # first, get the ISSNs associated with the record
            pissns = bibjson.get_identifiers(bibjson.P_ISSN)
            eissns = bibjson.get_identifiers(bibjson.E_ISSN)

            for j in journals:
                if j["pissn"] in pissns and j["eissn"] in eissns:
                    return j["instance"]

        return mock
    def test_has_permissions(self):

        journal_source = JournalFixtureFactory.make_journal_source()
        journal1 = Journal(**journal_source)

        publisher_owner_src = AccountFixtureFactory.make_publisher_source()
        publisher_owner = Account(**publisher_owner_src)
        publisher_stranged_src = AccountFixtureFactory.make_publisher_source()
        publisher_stranged = Account(**publisher_stranged_src)
        admin_src = AccountFixtureFactory.make_managing_editor_source()
        admin = Account(**admin_src)

        journal1.set_owner(publisher_owner)
        journal1.save(blocking=True)

        eissn = journal1.bibjson().get_one_identifier("eissn")
        pissn = journal1.bibjson().get_one_identifier("pissn")

        art_source = ArticleFixtureFactory.make_article_source(eissn=eissn,
                                                               pissn=pissn)
        article = Article(**art_source)

        assert self.svc.has_permissions(publisher_stranged, article, False)
        assert self.svc.has_permissions(publisher_owner, article, True)
        assert self.svc.has_permissions(admin, article, True)
        failed_result = self.svc.has_permissions(publisher_stranged, article, True)
        assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [],
                                 'unowned': [pissn, eissn],
                                 'unmatched': []}, "received: {}".format(failed_result)
Exemple #3
0
    def get_journal(cls, specs):
        journals = []

        for spec in specs:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**source)
            bj = j.bibjson()
            bj.title = spec.get("title", "Journal Title")
            bj.remove_identifiers()
            if "pissn" in spec:
                bj.add_identifier(bj.P_ISSN, spec.get("pissn"))
            if "eissn" in spec:
                bj.add_identifier(bj.E_ISSN, spec.get("eissn"))
            spec["instance"] = j
            journals.append(spec)

        def mock(self):
            bibjson = self.bibjson()

            # first, get the ISSNs associated with the record
            pissns = bibjson.get_identifiers(bibjson.P_ISSN)
            eissns = bibjson.get_identifiers(bibjson.E_ISSN)

            for j in journals:
                if j["pissn"] in pissns and j["eissn"] in eissns:
                    return j["instance"]

        return mock
Exemple #4
0
    def find_by_issn(cls, issns, owners):
        journals = []
        seen_issns = []

        for owner in owners:
            for eissn, pissn in issns:
                if eissn not in seen_issns and eissn is not None:
                    seen_issns.append(eissn)
                if pissn not in seen_issns and pissn is not None:
                    seen_issns.append(pissn)

                source = JournalFixtureFactory.make_journal_source(
                    in_doaj=True)
                journal = Journal(**source)
                journal.set_owner(owner)

                journal.bibjson().remove_identifiers("eissn")
                journal.bibjson().remove_identifiers("pissn")
                if eissn is not None:
                    journal.bibjson().add_identifier("eissn", eissn)
                if pissn is not None:
                    journal.bibjson().add_identifier("pissn", pissn)
                journals.append(journal)

        @classmethod
        def mock(cls, issns, in_doaj=None, max=10):
            if not isinstance(issns, list):
                issns = [issns]

            for issn in issns:
                if issn in seen_issns:
                    return journals

            return []

        return mock
def journals_applications_provenance(outfile_applications, outfile_accounts,
                                     outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(
            outfile_accounts, "wb",
            "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Last Updated", "Application Status",
            "Published Diff", "Latest Edit Recorded",
            "Latest Accepted Recorded"
        ])

        out_accounts = csv.writer(g)
        out_accounts.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ",
            "Missing Account ID"
        ])

        out_reapps = csv.writer(h)
        out_reapps.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Created",
            "Application Last Updated", "Application Last Manual Update",
            "Application Status", "Published Diff"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(
                latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.last_updated, latest.application_status, diff,
                    last_edit, last_accept
                ])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(
            ):
                out_reapps.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.created_date, latest.last_updated,
                    latest.last_manual_update, latest.application_status, mdiff
                ])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request,
                    str(journal.is_in_doaj()), "NO OWNER"
                ])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([
                        journal.id, journal.created_date,
                        journal.last_update_request,
                        str(journal.is_in_doaj()), owner
                    ])

        print "processed", counter, "journals"
Exemple #6
0
    def add_journal_metadata(self, j=None, reg=None):
        """
        this function makes sure the article is populated
        with all the relevant info from its owning parent object
        :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE!
        """

        # Record the data that is copied into the article into the "reg"ister, in case the
        # caller needs to know exactly and only which information was copied
        if reg is None:
            reg = Journal()
        rbj = reg.bibjson()

        if j is None:
            journal = self.get_journal()
        else:
            journal = j

        # we were unable to find a journal
        if journal is None:
            raise NoJournalException(
                "Unable to find a journal associated with this article")

        # if we get to here, we have a journal record we want to pull data from
        jbib = journal.bibjson()
        bibjson = self.bibjson()

        # tripwire to be tripped if the journal makes changes to the article
        trip = False

        if bibjson.subjects() != jbib.subjects():
            trip = True
            bibjson.set_subjects(jbib.subjects())
        rbj.set_subjects(jbib.subjects())

        if jbib.title is not None:
            if bibjson.journal_title != jbib.title:
                trip = True
                bibjson.journal_title = jbib.title
            rbj.title = jbib.title

        if jbib.get_license() is not None:
            lic = jbib.get_license()
            alic = bibjson.get_journal_license()

            if lic is not None and (
                    alic is None or
                (lic.get("title") != alic.get("title")
                 or lic.get("type") != alic.get("type")
                 or lic.get("url") != alic.get("url")
                 or lic.get("version") != alic.get("version")
                 or lic.get("open_access") != alic.get("open_access"))):

                bibjson.set_journal_license(lic.get("title"), lic.get("type"),
                                            lic.get("url"), lic.get("version"),
                                            lic.get("open_access"))
                trip = True

            rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"),
                            lic.get("version"), lic.get("open_access"))

        if len(jbib.language) > 0:
            jlang = jbib.language
            alang = bibjson.journal_language
            jlang.sort()
            alang.sort()
            if jlang != alang:
                bibjson.journal_language = jbib.language
                trip = True
            rbj.set_language(jbib.language)

        if jbib.country is not None:
            if jbib.country != bibjson.journal_country:
                bibjson.journal_country = jbib.country
                trip = True
            rbj.country = jbib.country

        if jbib.publisher:
            if jbib.publisher != bibjson.publisher:
                bibjson.publisher = jbib.publisher
                trip = True
            rbj.publisher = jbib.publisher

        # Copy the seal info, in_doaj status and the journal's ISSNs
        if journal.is_in_doaj() != self.is_in_doaj():
            self.set_in_doaj(journal.is_in_doaj())
            trip = True
        reg.set_in_doaj(journal.is_in_doaj())

        if journal.has_seal() != self.has_seal():
            self.set_seal(journal.has_seal())
            trip = True
        reg.set_seal(journal.has_seal())

        try:
            aissns = bibjson.journal_issns
            jissns = jbib.issns()
            aissns.sort()
            jissns.sort()
            if aissns != jissns:
                bibjson.journal_issns = jbib.issns()
                trip = True

            eissns = jbib.get_identifiers(jbib.E_ISSN)
            pissns = jbib.get_identifiers(jbib.P_ISSN)
            if eissns is not None and len(eissns) > 0:
                rbj.add_identifier(rbj.E_ISSN, eissns[0])
            if pissns is not None and len(pissns) > 0:
                rbj.add_identifier(rbj.P_ISSN, pissns[0])
        except KeyError:
            # No issns, don't worry about it for now
            pass

        return trip
Exemple #7
0
    def form2obj(form, existing_journal):
        journal = Journal()
        bibjson = journal.bibjson()

        # The if statements that wrap practically every field are there due to this
        # form being used to edit old journals which don't necessarily have most of
        # this info.
        # It also allows admins to delete the contents of any field if they wish,
        # by ticking the "Allow incomplete form" checkbox and deleting the contents
        # of that field. The if condition(s) will then *not* add the relevant field to the
        # new journal object being constructed.
        # add_url in the journal model has a safeguard against empty URL-s.

        if form.title.data:
            bibjson.title = form.title.data
        bibjson.add_url(form.url.data, urltype='homepage')
        if form.alternative_title.data:
            bibjson.alternative_title = form.alternative_title.data
        if form.pissn.data:
            bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data)
        if form.eissn.data:
            bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data)
        if form.publisher.data:
            bibjson.publisher = form.publisher.data
        if form.society_institution.data:
            bibjson.institution = form.society_institution.data
        if form.platform.data:
            bibjson.provider = form.platform.data
        if form.contact_name.data or form.contact_email.data:
            journal.add_contact(form.contact_name.data, form.contact_email.data)
        if form.country.data:
            bibjson.country = form.country.data

        if forms.interpret_special(form.processing_charges.data):
            bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data)

        if forms.interpret_special(form.submission_charges.data):
            bibjson.set_submission_charges(form.submission_charges_currency.data, form.submission_charges_amount.data)

        if forms.interpret_special(form.waiver_policy.data):
            bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy')

        # checkboxes
        if forms.interpret_special(form.digital_archiving_policy.data) or form.digital_archiving_policy_url.data:
            archiving_policies = forms.interpret_special(form.digital_archiving_policy.data)
            archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True)
            archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True)
            bibjson.set_archiving_policy(archiving_policies, form.digital_archiving_policy_url.data)

        if form.crawl_permission.data and form.crawl_permission.data != 'None':
            bibjson.allows_fulltext_indexing = forms.interpret_special(form.crawl_permission.data)  # just binary

        # checkboxes
        article_ids = forms.interpret_special(form.article_identifiers.data)
        article_ids = forms.interpret_other(article_ids, form.article_identifiers_other.data)
        if article_ids:
            bibjson.persistent_identifier_scheme = article_ids

        if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data:
            bibjson.set_article_statistics(form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data))

        if form.first_fulltext_oa_year.data:
            bibjson.set_oa_start(year=form.first_fulltext_oa_year.data)

        # checkboxes
        fulltext_format = forms.interpret_other(form.fulltext_format.data, form.fulltext_format_other.data)
        if fulltext_format:
            bibjson.format = fulltext_format

        if form.keywords.data:
            bibjson.set_keywords(form.keywords.data)  # tag list field

        if form.languages.data:
            bibjson.set_language(form.languages.data)  # select multiple field - gives a list back

        bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board')

        if form.review_process.data or form.review_process_url.data:
            bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data)

        bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope')
        bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions')

        if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data:
            bibjson.set_plagiarism_detection(
                form.plagiarism_screening_url.data,
                has_detection=forms.interpret_special(form.plagiarism_screening.data)
            )

        if form.publication_time.data:
            bibjson.publication_time = form.publication_time.data

        bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement')

        license_type = forms.interpret_other(form.license.data, form.license_other.data)
        if forms.interpret_special(license_type):
        # "None" and "False" as strings like they come out of the WTForms processing)
        # would get interpreted correctly by this check, so "None" licenses should not appear
            if license_type in licenses:
                by = licenses[license_type]['BY']
                nc = licenses[license_type]['NC']
                nd = licenses[license_type]['ND']
                sa = licenses[license_type]['SA']
                license_title = licenses[license_type]['title']
            elif form.license_checkbox.data:
                by = True if 'BY' in form.license_checkbox.data else False
                nc = True if 'NC' in form.license_checkbox.data else False
                nd = True if 'ND' in form.license_checkbox.data else False
                sa = True if 'SA' in form.license_checkbox.data else False
                license_title = license_type
            else:
                by = None; nc = None; nd = None; sa = None;
                license_title = license_type

            bibjson.set_license(
                license_title,
                license_type,
                url=form.license_url.data,
                open_access=forms.interpret_special(form.open_access.data),
                by=by, nc=nc, nd=nd, sa=sa,
                embedded=forms.interpret_special(form.license_embedded.data),
                embedded_example_url=form.license_embedded_url.data
            )

        # checkboxes
        deposit_policies = forms.interpret_special(form.deposit_policy.data)  # need empty list if it's just "None"
        deposit_policies = forms.interpret_other(deposit_policies, form.deposit_policy_other.data)
        if deposit_policies:
            bibjson.deposit_policy = deposit_policies

        if form.copyright.data and form.copyright.data != 'None':
            holds_copyright = forms.interpret_other(
                forms.interpret_special(form.copyright.data),
                form.copyright_other.data
            )
            bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright)

        if form.publishing_rights.data and form.publishing_rights.data != 'None':
            publishing_rights = forms.interpret_other(
                forms.interpret_special(form.publishing_rights.data),
                form.publishing_rights_other.data
            )
            bibjson.set_author_publishing_rights(form.publishing_rights_url.data, holds_rights=publishing_rights)

        # need to copy over the notes from the existing journal object, if any, otherwise
        # the dates on all the notes will get reset to right now (i.e. last_updated)
        # since the journal object we're creating in this xwalk is a new, empty one
        journal.set_notes(existing_journal.notes())

        # generate index of notes, just the text
        curnotes = []
        for curnote in journal.notes():
            curnotes.append(curnote['note'])

        # add any new notes
        formnotes = []
        for formnote in form.notes.data:
            if formnote['note']:
                if formnote['note'] not in curnotes and formnote["note"] != "":
                    journal.add_note(formnote['note'])
                # also generate another text index of notes, this time an index of the form notes
                formnotes.append(formnote['note'])

        if current_user.has_role("delete_note"):
            # delete all notes not coming back from the form, means they've been deleted
            # also if one of the saved notes is completely blank, delete it
            for curnote in journal.notes()[:]:
                if not curnote['note'] or curnote['note'] not in formnotes:
                    journal.remove_note(curnote)

        new_subjects = []
        for code in form.subject.data:
            sobj = {"scheme": 'LCC', "term": lcc.lookup_code(code), "code": code}
            new_subjects.append(sobj)
        bibjson.set_subjects(new_subjects)

        owner = form.owner.data.strip()
        if owner:
            journal.set_owner(owner)

        editor_group = form.editor_group.data.strip()
        if editor_group:
            journal.set_editor_group(editor_group)

        editor = form.editor.data.strip()
        if editor:
            journal.set_editor(editor)

        # old fields - only create them in the journal record if the values actually exist
        # need to use interpret_special in the test condition in case 'None' comes back from the form
        if getattr(form, 'author_pays', None):
            if forms.interpret_special(form.author_pays.data):
                bibjson.author_pays = form.author_pays.data
        if getattr(form, 'author_pays_url', None):
            if forms.interpret_special(form.author_pays_url.data):
                bibjson.author_pays_url = form.author_pays_url.data
        if getattr(form, 'oa_end_year', None):
            if forms.interpret_special(form.oa_end_year.data):
                bibjson.set_oa_end(form.oa_end_year.data)

        return journal
Exemple #8
0
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save()

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(
                    eissn="1234-5678",
                    pissn="9876-5432",
                    doi=the_doi,
                    fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save(blocking=True)
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678"  # one matching
            pissn = "6789-1234"  # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article)
        else:
            possible_articles = svc.discover_duplicates(article)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[
                        -1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[
                        0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
Exemple #9
0
    def form2obj(form, existing_journal):
        journal = Journal()
        bibjson = journal.bibjson()

        # The if statements that wrap practically every field are there due to this
        # form being used to edit old journals which don't necessarily have most of
        # this info.
        # It also allows admins to delete the contents of any field if they wish,
        # by ticking the "Allow incomplete form" checkbox and deleting the contents
        # of that field. The if condition(s) will then *not* add the relevant field to the
        # new journal object being constructed.
        # add_url in the journal model has a safeguard against empty URL-s.

        if form.title.data:
            bibjson.title = form.title.data
        bibjson.add_url(form.url.data, urltype='homepage')
        if form.alternative_title.data:
            bibjson.alternative_title = form.alternative_title.data
        if form.pissn.data:
            bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data)
        if form.eissn.data:
            bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data)
        if form.publisher.data:
            bibjson.publisher = form.publisher.data
        if form.society_institution.data:
            bibjson.institution = form.society_institution.data
        if form.platform.data:
            bibjson.provider = form.platform.data
        if form.contact_name.data or form.contact_email.data:
            journal.add_contact(form.contact_name.data,
                                form.contact_email.data)
        if form.country.data:
            bibjson.country = form.country.data

        if forms.interpret_special(form.processing_charges.data):
            bibjson.set_apc(form.processing_charges_currency.data,
                            form.processing_charges_amount.data)

        if forms.interpret_special(form.submission_charges.data):
            bibjson.set_submission_charges(
                form.submission_charges_currency.data,
                form.submission_charges_amount.data)

        if forms.interpret_special(form.waiver_policy.data):
            bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy')

        # checkboxes
        if forms.interpret_special(form.digital_archiving_policy.data
                                   ) or form.digital_archiving_policy_url.data:
            archiving_policies = forms.interpret_special(
                form.digital_archiving_policy.data)
            archiving_policies = forms.interpret_other(
                archiving_policies,
                form.digital_archiving_policy_other.data,
                store_other_label=True)
            archiving_policies = forms.interpret_other(
                archiving_policies,
                form.digital_archiving_policy_library.data,
                forms.digital_archiving_policy_specific_library_value,
                store_other_label=True)
            bibjson.set_archiving_policy(
                archiving_policies, form.digital_archiving_policy_url.data)

        if form.crawl_permission.data and form.crawl_permission.data != 'None':
            bibjson.allows_fulltext_indexing = forms.interpret_special(
                form.crawl_permission.data)  # just binary

        # checkboxes
        article_ids = forms.interpret_special(form.article_identifiers.data)
        article_ids = forms.interpret_other(
            article_ids, form.article_identifiers_other.data)
        if article_ids:
            bibjson.persistent_identifier_scheme = article_ids

        if (form.download_statistics.data and form.download_statistics.data !=
                'None') or form.download_statistics_url.data:
            bibjson.set_article_statistics(
                form.download_statistics_url.data,
                forms.interpret_special(form.download_statistics.data))

        if form.first_fulltext_oa_year.data:
            bibjson.set_oa_start(year=form.first_fulltext_oa_year.data)

        # checkboxes
        fulltext_format = forms.interpret_other(
            form.fulltext_format.data, form.fulltext_format_other.data)
        if fulltext_format:
            bibjson.format = fulltext_format

        if form.keywords.data:
            bibjson.set_keywords(form.keywords.data)  # tag list field

        if form.languages.data:
            bibjson.set_language(form.languages.data
                                 )  # select multiple field - gives a list back

        bibjson.add_url(form.editorial_board_url.data,
                        urltype='editorial_board')

        if form.review_process.data or form.review_process_url.data:
            bibjson.set_editorial_review(form.review_process.data,
                                         form.review_process_url.data)

        bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope')
        bibjson.add_url(form.instructions_authors_url.data,
                        urltype='author_instructions')

        if (form.plagiarism_screening.data and form.plagiarism_screening.data
                != 'None') or form.plagiarism_screening_url.data:
            bibjson.set_plagiarism_detection(
                form.plagiarism_screening_url.data,
                has_detection=forms.interpret_special(
                    form.plagiarism_screening.data))

        if form.publication_time.data:
            bibjson.publication_time = form.publication_time.data

        bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement')

        license_type = forms.interpret_other(form.license.data,
                                             form.license_other.data)
        if forms.interpret_special(license_type):
            # "None" and "False" as strings like they come out of the WTForms processing)
            # would get interpreted correctly by this check, so "None" licenses should not appear
            if license_type in licenses:
                by = licenses[license_type]['BY']
                nc = licenses[license_type]['NC']
                nd = licenses[license_type]['ND']
                sa = licenses[license_type]['SA']
                license_title = licenses[license_type]['title']
            elif form.license_checkbox.data:
                by = True if 'BY' in form.license_checkbox.data else False
                nc = True if 'NC' in form.license_checkbox.data else False
                nd = True if 'ND' in form.license_checkbox.data else False
                sa = True if 'SA' in form.license_checkbox.data else False
                license_title = license_type
            else:
                by = None
                nc = None
                nd = None
                sa = None
                license_title = license_type

            bibjson.set_license(
                license_title,
                license_type,
                url=form.license_url.data,
                open_access=forms.interpret_special(form.open_access.data),
                by=by,
                nc=nc,
                nd=nd,
                sa=sa,
                embedded=forms.interpret_special(form.license_embedded.data),
                embedded_example_url=form.license_embedded_url.data)

        # checkboxes
        deposit_policies = forms.interpret_special(
            form.deposit_policy.data)  # need empty list if it's just "None"
        deposit_policies = forms.interpret_other(
            deposit_policies, form.deposit_policy_other.data)
        if deposit_policies:
            bibjson.deposit_policy = deposit_policies

        if form.copyright.data and form.copyright.data != 'None':
            holds_copyright = forms.interpret_other(
                forms.interpret_special(form.copyright.data),
                form.copyright_other.data)
            bibjson.set_author_copyright(form.copyright_url.data,
                                         holds_copyright=holds_copyright)

        if form.publishing_rights.data and form.publishing_rights.data != 'None':
            publishing_rights = forms.interpret_other(
                forms.interpret_special(form.publishing_rights.data),
                form.publishing_rights_other.data)
            bibjson.set_author_publishing_rights(
                form.publishing_rights_url.data,
                holds_rights=publishing_rights)

        # need to copy over the notes from the existing journal object, if any, otherwise
        # the dates on all the notes will get reset to right now (i.e. last_updated)
        # since the journal object we're creating in this xwalk is a new, empty one
        journal.set_notes(existing_journal.notes())

        # generate index of notes, just the text
        curnotes = []
        for curnote in journal.notes():
            curnotes.append(curnote['note'])

        # add any new notes
        formnotes = []
        for formnote in form.notes.data:
            if formnote['note']:
                if formnote['note'] not in curnotes and formnote["note"] != "":
                    journal.add_note(formnote['note'])
                # also generate another text index of notes, this time an index of the form notes
                formnotes.append(formnote['note'])

        if current_user.has_role("delete_note"):
            # delete all notes not coming back from the form, means they've been deleted
            # also if one of the saved notes is completely blank, delete it
            for curnote in journal.notes()[:]:
                if not curnote['note'] or curnote['note'] not in formnotes:
                    journal.remove_note(curnote)

        new_subjects = []
        for code in form.subject.data:
            sobj = {
                "scheme": 'LCC',
                "term": lcc.lookup_code(code),
                "code": code
            }
            new_subjects.append(sobj)
        bibjson.set_subjects(new_subjects)

        owner = form.owner.data.strip()
        if owner:
            journal.set_owner(owner)

        editor_group = form.editor_group.data.strip()
        if editor_group:
            journal.set_editor_group(editor_group)

        editor = form.editor.data.strip()
        if editor:
            journal.set_editor(editor)

        # old fields - only create them in the journal record if the values actually exist
        # need to use interpret_special in the test condition in case 'None' comes back from the form
        if getattr(form, 'author_pays', None):
            if forms.interpret_special(form.author_pays.data):
                bibjson.author_pays = form.author_pays.data
        if getattr(form, 'author_pays_url', None):
            if forms.interpret_special(form.author_pays_url.data):
                bibjson.author_pays_url = form.author_pays_url.data
        if getattr(form, 'oa_end_year', None):
            if forms.interpret_special(form.oa_end_year.data):
                bibjson.set_oa_end(form.oa_end_year.data)

        return journal
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn,
                pissn=pissn,
                doi="10.123/abc/1",
                fulltext="http://example.com/1")
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(
            owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=eissn,
                pissn=pissn,
                doi="10.123/abc/1",
                fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check,
                                        merge_duplicate, limit_to_account,
                                        add_journal_info, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check,
                                             merge_duplicate, limit_to_account,
                                             add_journal_info, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None

            if add_journal_info:
                assert article.bibjson(
                ).journal_title == "Add Journal Info Title"
Exemple #11
0
class TestCreateOrUpdateArticle(DoajTestCase):
    def setUp(self):
        super(TestCreateOrUpdateArticle, self).setUp()

        self.publisher = Account()
        self.publisher.add_role("publisher")
        self.publisher.save(blocking=True)

        self.admin = Account()
        self.admin.add_role("admin")
        self.admin.save(blocking=True)

        sources = JournalFixtureFactory.make_many_journal_sources(2, True)
        self.journal1 = Journal(**sources[0])
        self.journal1.set_owner(self.publisher.id)
        jbib1 = self.journal1.bibjson()
        jbib1.add_identifier(jbib1.P_ISSN, "1111-1111")
        jbib1.add_identifier(jbib1.E_ISSN, "2222-2222")
        self.journal1.save(blocking=True)

        self.publisher.add_journal(self.journal1)

        self.journal2 = Journal(**sources[1])
        jbib2 = self.journal2.bibjson()
        jbib2.add_identifier(jbib2.P_ISSN, "1234-5678")
        jbib2.add_identifier(jbib2.E_ISSN, "9876-5432")
        self.journal2.save(blocking=True)

        self.article10 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-10",
            fulltext="https://www.article10.com"))
        self.article10.set_id("articleid10")
        self.article10.save(blocking=True)

        self.article11 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-11",
            fulltext="https://www.article11.com"))
        self.article11.set_id("articleid11")
        self.article11.save(blocking=True)

        self.article2 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1234-5678",
            eissn="9876-5432",
            doi="10.0000/article-2",
            fulltext="https://www.article2.com"))
        self.article2.set_id("articleid2")
        self.article2.save(blocking=True)

    def tearDown(self):
        super(TestCreateOrUpdateArticle, self).tearDown()

    def test_00_no_doi_and_url_changed(self):
        ba = self.article10.bibjson()
        ba.title = "Updated Article"

        # try for admin

        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \
            .format(self.article10.bibjson().title)

        ba.title = "Updated 2nd time"

        # try for publisher

        resp = ArticleService.create_article(self=ArticleService(),
                                             account=self.publisher,
                                             article=self.article10)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.bibjson().title == "Updated 2nd time", "Expected `Updated 2nd time`, received: {}" \
            .format(self.article10.bibjson().title)

    def test_01_new_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        ba.remove_urls(ba.FULLTEXT)
        ba.add_identifier(ba.DOI, "10.0000/NEW")
        ba.add_url(ba.FULLTEXT, "https://www.UPDATED.com")

        #for publisher
        resp = ArticleService.create_article(self=ArticleService(),
                                             account=self.publisher,
                                             article=self.article10)
        assert resp["success"] == 1, "expected 1 new, received: {}".format(
            resp)
        assert resp["update"] == 0, "expected 1 new, received: {}".format(resp)
        assert resp["new"] == 1, "expected 1 new, received: {}".format(resp)

        #for admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 new, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 new, received: {}".format(resp)
        assert resp["new"] == 0, "expected 1 new, received: {}".format(resp)

    def test_02_old_doi_existing_url_admin(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        # check for url from other article owned by the same publisher
        ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

        # check for url from other article owned by someone else
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url(self.article2.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_03_existing_doi_old_url_admin(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        # check for DOI from other article owned by the same publisher
        ba.add_identifier(ba.DOI, "10.0000/article-11")

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

        ba.remove_identifiers(ba.DOI)
        # check for DOI from other article owned by someone else
        ba.add_identifier(ba.DOI, "10.0000/article-2")

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_04_old_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url("https://updated.com", ba.FULLTEXT)

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 new, received: {}".format(resp)
        assert self.article10.get_normalised_fulltext(
        ) == "//updated.com", "expected //updated.com, received: {}".format(
            self.article10.get_normalised_fulltext())

    def test_05_new_doi_old_url(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI, "10.0000/article-UPDATED")

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.get_normalised_doi() == "10.0000/article-UPDATED", \
            "expected 10.0000/article-UPDATED, received: {}".format(
                self.article10.get_normalised_fulltext())

    def test_06_existing_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url("https://updated.com", ba.FULLTEXT)
        # check for doi from other article of the same publisher
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI,
                          self.article11.bibjson().get_one_identifier(ba.DOI))

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_07_new_doi_existing_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)
        # check for doi from other article of the same publisher
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI, "10.0000/article-UPDATED")

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)
Exemple #12
0
    def add_journal_metadata(self, j=None, reg=None):
        """
        this function makes sure the article is populated
        with all the relevant info from its owning parent object
        :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE!
        """

        # Record the data that is copied into the article into the "reg"ister, in case the
        # caller needs to know exactly and only which information was copied
        if reg is None:
            reg = Journal()
        rbj = reg.bibjson()

        if j is None:
            journal = self.get_journal()
        else:
            journal = j

        # we were unable to find a journal
        if journal is None:
            raise NoJournalException("Unable to find a journal associated with this article")

        # if we get to here, we have a journal record we want to pull data from
        jbib = journal.bibjson()
        bibjson = self.bibjson()

        # tripwire to be tripped if the journal makes changes to the article
        trip = False

        if bibjson.subjects() != jbib.subjects():
            trip = True
            bibjson.set_subjects(jbib.subjects())
        rbj.set_subjects(jbib.subjects())

        if jbib.title is not None:
            if bibjson.journal_title != jbib.title:
                trip = True
                bibjson.journal_title = jbib.title
            rbj.title = jbib.title

        if jbib.get_license() is not None:
            lic = jbib.get_license()
            alic = bibjson.get_journal_license()

            if lic is not None and (alic is None or (lic.get("title") != alic.get("title") or
                    lic.get("type") != alic.get("type") or
                    lic.get("url") != alic.get("url") or
                    lic.get("version") != alic.get("version") or
                    lic.get("open_access") != alic.get("open_access"))):

                bibjson.set_journal_license(lic.get("title"),
                                            lic.get("type"),
                                            lic.get("url"),
                                            lic.get("version"),
                                            lic.get("open_access"))
                trip = True

            rbj.set_license(lic.get("title"),
                            lic.get("type"),
                            lic.get("url"),
                            lic.get("version"),
                            lic.get("open_access"))

        if len(jbib.language) > 0:
            jlang = jbib.language
            alang = bibjson.journal_language
            jlang.sort()
            alang.sort()
            if jlang != alang:
                bibjson.journal_language = jbib.language
                trip = True
            rbj.set_language(jbib.language)

        if jbib.country is not None:
            if jbib.country != bibjson.journal_country:
                bibjson.journal_country = jbib.country
                trip = True
            rbj.country = jbib.country

        if jbib.publisher:
            if jbib.publisher != bibjson.publisher:
                bibjson.publisher = jbib.publisher
                trip = True
            rbj.publisher = jbib.publisher

        # Copy the seal info, in_doaj status and the journal's ISSNs
        if journal.is_in_doaj() != self.is_in_doaj():
            self.set_in_doaj(journal.is_in_doaj())
            trip = True
        reg.set_in_doaj(journal.is_in_doaj())

        if journal.has_seal() != self.has_seal():
            self.set_seal(journal.has_seal())
            trip = True
        reg.set_seal(journal.has_seal())

        try:
            aissns = bibjson.journal_issns
            jissns = jbib.issns()
            aissns.sort()
            jissns.sort()
            if aissns != jissns:
                bibjson.journal_issns = jbib.issns()
                trip = True

            eissns = jbib.get_identifiers(jbib.E_ISSN)
            pissns = jbib.get_identifiers(jbib.P_ISSN)
            if eissns is not None and len(eissns) > 0:
                rbj.add_identifier(rbj.E_ISSN, eissns[0])
            if pissns is not None and len(pissns) > 0:
                rbj.add_identifier(rbj.P_ISSN, pissns[0])
        except KeyError:
            # No issns, don't worry about it for now
            pass

        return trip
Exemple #13
0
    def test_01_create_article(self, value, kwargs):

        article_arg = kwargs.get("article")
        account_arg = kwargs.get("account")
        get_duplicate_result_arg = kwargs.get("get_duplicate_result")
        role_arg = kwargs.get("role")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")
        update_article_id_arg = kwargs.get("update_article_id")
        has_ft_doi_changed_arg = kwargs.get("has_ft_doi_changed_arg")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        has_ft_doi_changed = True if has_ft_doi_changed_arg == "yes" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"
        doi = "10.123/abc/1"
        fulltext = "http://example.com/1"

        another_doi = "10.123/duplicate-1"

        another_eissn = "1111-1111"
        another_pissn = "2222-2222"

        duplicate_id = None
        original_id = None
        update_article_id = None

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        if get_duplicate_result_arg == 'different':
            source = ArticleFixtureFactory.make_article_source(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext)
            del source["bibjson"]["journal"]
            duplicate = Article(**source)
            duplicate.save()
            duplicate_id = duplicate.id

        article_id_to_upload = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            article_id_to_upload = article.id

        if get_duplicate_result_arg == "itself":
            source = ArticleFixtureFactory.make_article_source(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext)
            del source["bibjson"]["journal"]
            duplicate = Article(**source)
            duplicate.set_id(article_id_to_upload)
            duplicate.save()
            duplicate_id = duplicate.id

        if update_article_id_arg != "none":

            another_source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            original = Article(**another_source)
            original.save(blocking=True)
            original_id = original.id

            if update_article_id_arg == "doi_ft_not_changed":
                article.bibjson().title = "This needs to be updated"

            elif update_article_id_arg == "doi_ft_changed_duplicate":

                article.bibjson().remove_identifiers("doi")
                article.bibjson().add_identifier("doi", another_doi)

            elif update_article_id_arg == "doi_ft_changed_ok":

                article.bibjson().remove_identifiers("doi")
                article.bibjson().add_identifier("doi", "10.1234/updated")

        else:
            update_article_id = None

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(
            owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        if role_arg == "admin":
            account.set_role("admin")

        account.save()

        if get_duplicate_result_arg == "none":
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)

        elif get_duplicate_result_arg == "itself":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=eissn,
                pissn=pissn,
                doi=doi,
                fulltext=fulltext,
                given_article_id=original_id)
        elif get_duplicate_result_arg == "different":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext,
                given_article_id=duplicate_id)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(
                given_article_id="exception")
        self.svc.get_duplicate = gd_mock
        mock_article = self.svc.get_duplicate(article)

        if role_arg == "admin" or (role_arg == "publisher"
                                   and account_arg == "owner"):
            has_permissions_mock = BLLArticleMockFactory.has_permissions(True)
        else:
            has_permissions_mock = BLLArticleMockFactory.has_permissions(False)
        self.svc.has_permissions = has_permissions_mock

        prepare_update_admin_mock = BLLArticleMockFactory._prepare_update_admin(
            get_duplicate_result_arg, update_article_id_arg)
        self.svc._prepare_update_admin = prepare_update_admin_mock

        prepare_update_publisher_mock = BLLArticleMockFactory._prepare_update_publisher(
            get_duplicate_result_arg, has_ft_doi_changed)
        self.svc._prepare_update_publisher = prepare_update_publisher_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article,
                                        account,
                                        merge_duplicate=merge_duplicate,
                                        add_journal_info=add_journal_info,
                                        dry_run=dry_run,
                                        update_article_id=original_id)
        else:
            report = self.svc.create_article(article,
                                             account,
                                             merge_duplicate=merge_duplicate,
                                             add_journal_info=add_journal_info,
                                             dry_run=dry_run,
                                             update_article_id=original_id)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes" and update_article_id is not None:
                if get_duplicate_result_arg == "itself":
                    original = Article.pull(update_article_id)
                    assert original is not None
                    assert report["update"] == 1, "update: {}".format(
                        report["update"])
                    assert report["new"] == 0, "update: {}".format(
                        report["new"])
            elif original_saved_arg == "yes":
                if get_duplicate_result_arg == "itself":
                    new = Article.pull(article_id_to_upload)
                    assert new is not None
                    assert report["update"] == 1, "update: {}".format(
                        report["update"])
                    assert report["new"] == 0, "update: {}".format(
                        report["new"])
                elif get_duplicate_result_arg == "none":
                    new = Article.pull(article_id_to_upload)
                    assert new is not None
                    assert report["update"] == 0, "update: {}".format(
                        report["update"])
                    assert report["new"] == 1, "update: {}".format(
                        report["new"])

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None and mock_article.id != original_id:
                merged = Article.pull(mock_article.id)
                assert merged is None, "merged: {}".format(merged)

            if add_journal_info:
                assert article.bibjson(
                ).journal_title == "Add Journal Info Title"

            if update_article_id_arg == "doi_ft_changed_ok":
                original = Article.pull(original_id)
                assert original is not None
            elif update_article_id_arg == "doi_ft_not_changed":
                original = Article.pull(original_id)
                assert original is not None
    def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises):
        # set up for the test
        #########################################

        cj = None
        has_seal = bool(randint(0, 1))
        application = None
        if application_type == "present":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.set_id(application.makeid())
            application.remove_contacts()
            application.remove_editor_group()
            application.remove_editor()
            application.remove_owner()
            application.remove_current_journal()
            application.remove_notes()

            if app_key_properties == "yes":
                application.add_contact("Application", "*****@*****.**")
                application.set_editor_group("appeditorgroup")
                application.set_editor("appeditor")
                application.set_owner("appowner")

            application.set_seal(has_seal)
            application.add_note("Application Note")

            if current_journal == "present":
                journal = Journal(**JournalFixtureFactory.make_journal_source())
                journal.remove_contacts()
                journal.add_contact("Journal", "*****@*****.**")
                journal.set_editor_group("journaleditorgroup")
                journal.set_editor("journaleditor")
                journal.set_owner("journalowner")
                journal.remove_current_application()
                journal.remove_notes()
                journal.add_note("Journal Note")
                journal.save(blocking=True)
                application.set_current_journal(journal.id)
                cj = journal
            elif current_journal == "missing":
                application.set_current_journal("123456789987654321")

        manual_update = None
        if manual_update_arg == "true":
            manual_update = True
        elif manual_update_arg == "false":
            manual_update = False

        # execute the test
        ########################################

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.application_2_journal(application, manual_update)
        else:
            journal = svc.application_2_journal(application, manual_update)

            # check the result
            ######################################

            assert journal is not None
            assert isinstance(journal, Journal)
            assert journal.is_in_doaj() is True

            jbj = journal.bibjson().data
            del jbj["active"]
            assert jbj == application.bibjson().data

            if current_journal == "present":
                assert len(journal.related_applications) == 3
            else:
                assert len(journal.related_applications) == 1
            related = journal.related_application_record(application.id)
            assert related is not None

            if manual_update_arg == "true":
                assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z"

            if app_key_properties == "yes":
                contacts = journal.contacts()
                assert len(contacts) == 1
                assert contacts[0].get("name") == "Application"
                assert contacts[0].get("email") == "*****@*****.**"
                assert journal.editor_group == "appeditorgroup"
                assert journal.editor == "appeditor"
                assert journal.owner == "appowner"
                assert journal.has_seal() == has_seal

                if current_journal == "present":
                    assert len(journal.notes) == 2
                else:
                    assert len(journal.notes) == 1

            elif app_key_properties == "no":
                if current_journal == "present":
                    contacts = journal.contacts()
                    assert len(contacts) == 1
                    assert contacts[0].get("name") == "Journal"
                    assert contacts[0].get("email") == "*****@*****.**"
                    assert journal.editor_group == "journaleditorgroup"
                    assert journal.editor == "journaleditor"
                    assert journal.owner == "journalowner"
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 2

                elif current_journal == "none" or current_journal == "missing":
                    contacts = journal.contacts()
                    assert len(contacts) == 0
                    assert journal.editor_group is None
                    assert journal.editor is None
                    assert journal.owner is None
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 1

            if current_journal == "present":
                assert cj.id == journal.id
                assert cj.created_date == journal.created_date
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save(blocking=True)

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save()
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678" # one matching
            pissn = "6789-1234" # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article, owner_id)
        else:
            possible_articles = svc.discover_duplicates(article, owner_id)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[-1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                        limit_to_account, add_journal_info, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                             limit_to_account, add_journal_info, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None

            if add_journal_info:
                assert article.bibjson().journal_title == "Add Journal Info Title"
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"])

        out_accounts = csv.writer(g)
        out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"])

        out_reapps = csv.writer(h)
        out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj():
                out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner])

        print "processed", counter, "journals"
    def test_02_application_2_journal(self, name, application_type,
                                      manual_update_arg, app_key_properties,
                                      current_journal, raises):
        # set up for the test
        #########################################

        cj = None
        has_seal = bool(randint(0, 1))
        application = None
        if application_type == "present":
            application = Suggestion(
                **ApplicationFixtureFactory.make_application_source())
            application.set_id(application.makeid())
            application.remove_contacts()
            application.remove_editor_group()
            application.remove_editor()
            application.remove_owner()
            application.remove_current_journal()
            application.remove_notes()

            if app_key_properties == "yes":
                application.add_contact("Application",
                                        "*****@*****.**")
                application.set_editor_group("appeditorgroup")
                application.set_editor("appeditor")
                application.set_owner("appowner")

            application.set_seal(has_seal)
            application.add_note("Application Note")

            if current_journal == "present":
                journal = Journal(
                    **JournalFixtureFactory.make_journal_source())
                journal.remove_contacts()
                journal.add_contact("Journal", "*****@*****.**")
                journal.set_editor_group("journaleditorgroup")
                journal.set_editor("journaleditor")
                journal.set_owner("journalowner")
                journal.remove_current_application()
                journal.remove_notes()
                journal.add_note("Journal Note")
                journal.save(blocking=True)
                application.set_current_journal(journal.id)
                cj = journal
            elif current_journal == "missing":
                application.set_current_journal("123456789987654321")

        manual_update = None
        if manual_update_arg == "true":
            manual_update = True
        elif manual_update_arg == "false":
            manual_update = False

        # execute the test
        ########################################

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.application_2_journal(application, manual_update)
        else:
            journal = svc.application_2_journal(application, manual_update)

            # check the result
            ######################################

            assert journal is not None
            assert isinstance(journal, Journal)
            assert journal.is_in_doaj() is True

            jbj = journal.bibjson().data
            del jbj["active"]
            assert jbj == application.bibjson().data

            if current_journal == "present":
                assert len(journal.related_applications) == 3
            else:
                assert len(journal.related_applications) == 1
            related = journal.related_application_record(application.id)
            assert related is not None

            if manual_update_arg == "true":
                assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z"

            if app_key_properties == "yes":
                contacts = journal.contacts()
                assert len(contacts) == 1
                assert contacts[0].get("name") == "Application"
                assert contacts[0].get("email") == "*****@*****.**"
                assert journal.editor_group == "appeditorgroup"
                assert journal.editor == "appeditor"
                assert journal.owner == "appowner"
                assert journal.has_seal() == has_seal

                if current_journal == "present":
                    assert len(journal.notes) == 2
                else:
                    assert len(journal.notes) == 1

            elif app_key_properties == "no":
                if current_journal == "present":
                    contacts = journal.contacts()
                    assert len(contacts) == 1
                    assert contacts[0].get("name") == "Journal"
                    assert contacts[0].get("email") == "*****@*****.**"
                    assert journal.editor_group == "journaleditorgroup"
                    assert journal.editor == "journaleditor"
                    assert journal.owner == "journalowner"
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 2

                elif current_journal == "none" or current_journal == "missing":
                    contacts = journal.contacts()
                    assert len(contacts) == 0
                    assert journal.editor_group is None
                    assert journal.editor is None
                    assert journal.owner is None
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 1

            if current_journal == "present":
                assert cj.id == journal.id
                assert cj.created_date == journal.created_date