Beispiel #1
0
    def get_journal(cls, specs):
        journals = []

        for spec in specs:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**source)
            bj = j.bibjson()
            bj.title = spec.get("title", "Journal Title")
            bj.remove_identifiers()
            if "pissn" in spec:
                bj.add_identifier(bj.P_ISSN, spec.get("pissn"))
            if "eissn" in spec:
                bj.add_identifier(bj.E_ISSN, spec.get("eissn"))
            spec["instance"] = j
            journals.append(spec)

        def mock(self):
            bibjson = self.bibjson()

            # first, get the ISSNs associated with the record
            pissns = bibjson.get_identifiers(bibjson.P_ISSN)
            eissns = bibjson.get_identifiers(bibjson.E_ISSN)

            for j in journals:
                if j["pissn"] in pissns and j["eissn"] in eissns:
                    return j["instance"]

        return mock
Beispiel #2
0
    def get_journal(cls, specs):
        journals = []

        for spec in specs:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**source)
            bj = j.bibjson()
            bj.title = spec.get("title", "Journal Title")
            bj.remove_identifiers()
            if "pissn" in spec:
                bj.add_identifier(bj.P_ISSN, spec.get("pissn"))
            if "eissn" in spec:
                bj.add_identifier(bj.E_ISSN, spec.get("eissn"))
            spec["instance"] = j
            journals.append(spec)

        def mock(self):
            bibjson = self.bibjson()

            # first, get the ISSNs associated with the record
            pissns = bibjson.get_identifiers(bibjson.P_ISSN)
            eissns = bibjson.get_identifiers(bibjson.E_ISSN)

            for j in journals:
                if j["pissn"] in pissns and j["eissn"] in eissns:
                    return j["instance"]

        return mock
def migrate_journal(data):
    if "bibjson" not in data:
        return Journal(**data)
    ap = data.get("bibjson").get("archiving_policy")
    if ap is None:
        return Journal(**data)

    data["bibjson"]["archiving_policy"] = _reformat_data(ap)
    return Journal(**data)
def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"])

        out_missing = UnicodeWriter(g)
        out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([application.id, application.last_updated, created, diff])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title])

        print "processed", counter, "suggestions"
Beispiel #5
0
    def setUp(self):
        super(TestCreateOrUpdateArticle, self).setUp()

        self.publisher = Account()
        self.publisher.add_role("publisher")
        self.publisher.save(blocking=True)

        self.admin = Account()
        self.admin.add_role("admin")
        self.admin.save(blocking=True)

        sources = JournalFixtureFactory.make_many_journal_sources(2, True)
        self.journal1 = Journal(**sources[0])
        self.journal1.set_owner(self.publisher.id)
        jbib1 = self.journal1.bibjson()
        jbib1.add_identifier(jbib1.P_ISSN, "1111-1111")
        jbib1.add_identifier(jbib1.E_ISSN, "2222-2222")
        self.journal1.save(blocking=True)

        self.publisher.add_journal(self.journal1)

        self.journal2 = Journal(**sources[1])
        jbib2 = self.journal2.bibjson()
        jbib2.add_identifier(jbib2.P_ISSN, "1234-5678")
        jbib2.add_identifier(jbib2.E_ISSN, "9876-5432")
        self.journal2.save(blocking=True)

        self.article10 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-10",
            fulltext="https://www.article10.com"))
        self.article10.set_id("articleid10")
        self.article10.save(blocking=True)

        self.article11 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-11",
            fulltext="https://www.article11.com"))
        self.article11.set_id("articleid11")
        self.article11.save(blocking=True)

        self.article2 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1234-5678",
            eissn="9876-5432",
            doi="10.0000/article-2",
            fulltext="https://www.article2.com"))
        self.article2.set_id("articleid2")
        self.article2.save(blocking=True)
Beispiel #6
0
    def get_owner(self):
        b = self.bibjson()
        article_issns = b.get_identifiers(b.P_ISSN)
        article_issns += b.get_identifiers(b.E_ISSN)
        owners = []

        seen_journal_issns = {}
        for issn in article_issns:
            journals = Journal.find_by_issn(issn)
            if journals is not None and len(journals) > 0:
                for j in journals:
                    owners.append(j.owner)
                    if j.owner not in seen_journal_issns:
                        seen_journal_issns[j.owner] = []
                    seen_journal_issns[j.owner] += j.bibjson().issns()

        # deduplicate the list of owners
        owners = list(set(owners))

        # no owner means we can't confirm
        if len(owners) == 0:
            raise NoValidOwnerException

        # multiple owners means ownership of this article is confused
        if len(owners) > 1:
            return NoValidOwnerException

        return owners[0]
Beispiel #7
0
def get_publisher(acc):
    q = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "admin.owner.exact": acc.id
                    }
                }, {
                    "term": {
                        "admin.in_doaj": True
                    }
                }]
            }
        },
        "size": 0,
        "facets": {
            "publishers": {
                "terms": {
                    "field": "bibjson.publisher.exact",
                    "size": 1000
                }
            }
        }
    }
    es = Journal.query(q=q)
    pubs = [
        term.get("term") for term in es.get("facets", {}).get(
            "publishers", {}).get("terms", [])
    ]
    if len(pubs) == 0:
        return None
    return ", ".join(pubs)
Beispiel #8
0
def get_publisher(acc):
    q = {
        "query" : {
            "bool" : {
                "must" : [
                    {"term" : {"admin.owner.exact" : acc.id}},
                    {"term" : {"admin.in_doaj" : True}}
                ]
            }

        },
        "size" : 0,
        "facets" : {
            "publishers" : {
                "terms" : {
                    "field" : "bibjson.publisher.exact",
                    "size" : 1000
                }
            }
        }
    }
    es = Journal.query(q=q)
    pubs = [term.get("term") for term in es.get("facets", {}).get("publishers", {}).get("terms", [])]
    if len(pubs) == 0:
        return None
    return ", ".join(pubs)
Beispiel #9
0
 def _sync_owner_to_journal(self):
     if self.current_journal is None:
         return
     from portality.models import Journal
     cj = Journal.pull(self.current_journal)
     if cj is not None and cj.owner != self.owner:
         cj.set_owner(self.owner)
         cj.save(sync_owner=False)
Beispiel #10
0
def traverse_journals():
    """ Check dataset lookups in all journals in DOAJ """
    j_report = {'country': [], 'currency': [], 'language': []}

    for j in Journal.all_in_doaj():
        check_invalid_datasets(j, j_report)

    return j_report
    def test_has_permissions(self):

        journal_source = JournalFixtureFactory.make_journal_source()
        journal1 = Journal(**journal_source)

        publisher_owner_src = AccountFixtureFactory.make_publisher_source()
        publisher_owner = Account(**publisher_owner_src)
        publisher_stranged_src = AccountFixtureFactory.make_publisher_source()
        publisher_stranged = Account(**publisher_stranged_src)
        admin_src = AccountFixtureFactory.make_managing_editor_source()
        admin = Account(**admin_src)

        journal1.set_owner(publisher_owner)
        journal1.save(blocking=True)

        eissn = journal1.bibjson().get_one_identifier("eissn")
        pissn = journal1.bibjson().get_one_identifier("pissn")

        art_source = ArticleFixtureFactory.make_article_source(eissn=eissn,
                                                               pissn=pissn)
        article = Article(**art_source)

        assert self.svc.has_permissions(publisher_stranged, article, False)
        assert self.svc.has_permissions(publisher_owner, article, True)
        assert self.svc.has_permissions(admin, article, True)
        failed_result = self.svc.has_permissions(publisher_stranged, article, True)
        assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [],
                                 'unowned': [pissn, eissn],
                                 'unmatched': []}, "received: {}".format(failed_result)
Beispiel #12
0
def load_journal_cases():
    account = Account(**AccountFixtureFactory.make_publisher_source())
    account.set_id(account.makeid())

    journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
    journal.set_id(journal.makeid())

    wrong_id = uuid.uuid4()

    return [
        param("j_id_acc_lock", journal, journal.id, account, True, raises=lock.Locked),
        param("j_id_acc_nolock", journal, journal.id, account, False),
        param("j_id_noacc_nolock", journal, journal.id, None, False),
        param("j_noid_noacc_nolock", journal, None, None, False, raises=exceptions.ArgumentException),
        param("j_wid_noacc_nolock", journal, wrong_id, None, False),
        param("noj_id_noacc_nolock", None, journal.id, None, False),
        param("noj_noid_noacc_nolock", None, None, None, False, raises=exceptions.ArgumentException)
    ]
Beispiel #13
0
def _get_journal_id_from_issn(issn):
    issn = _normalise_issn(issn)
    journals = Journal.find_by_issn(issn)
    if len(journals) > 1:
        print "WARN: issn", issn, "maps to multiple journals:", ", ".join([j.id for j in journals])
    if len(journals) == 0:
        print "WARN: issn", issn, "does not map to any journals"
    if len(journals) > 0:
        return journals[0].id
Beispiel #14
0
    def get_result_url(self):
        """
        Get the URL for this OpenURLRequest's referent.
        :return: The url as a string, or None if not found.
        """
        try:
            results = self.query_es()
        except ValueError:
            return None

        if results is None:
            return None

        if results.get('hits', {}).get('total', 0) == 0:
            # No results found for query, retry
            results = self.fallthrough_retry()
            if results is None or results.get('hits', {}).get('total', 0) == 0:
                # This time we've definitely failed
                return None

        if results.get('hits', {}).get('hits',
                                       [{}])[0].get('_type') == 'journal':

            # construct a journal object around the result
            journal = Journal(**results['hits']['hits'][0])

            # the continuation is a first-class journal object, so if we have a journal we have the right continuation
            # (assuming that the user gave us specific enough information
            ident = journal.id

            # If there request has a volume parameter, query for presence of an article with that volume
            if self.volume:
                vol_iss_results = self.query_for_vol(journal)

                if vol_iss_results == None:
                    # we were asked for a vol/issue, but weren't given the correct information to get it.
                    return None
                elif vol_iss_results['hits']['total'] > 0:
                    # construct the toc url using the ident, plus volume and issue
                    jtoc_url = url_for("doaj.toc",
                                       identifier=ident,
                                       volume=self.volume,
                                       issue=self.issue)
                else:
                    # If no results, the DOAJ does not contain the vol/issue being searched. (Show openurl 404)
                    jtoc_url = None
            else:
                # if no volume parameter, construct the toc url using the ident only
                jtoc_url = url_for("doaj.toc", identifier=ident)
            return jtoc_url

        elif results.get('hits', {}).get('hits',
                                         [{}])[0].get('_type') == 'article':
            return url_for("doaj.article_page",
                           identifier=results['hits']['hits'][0]['_id'])
def publishers_with_journals():
    """ Get accounts for all publishers with journals in the DOAJ """
    for acc in esprit.tasks.scroll(conn, 'account', q=publisher_query):
        account = Account(**acc)
        journal_ids = account.journal
        if journal_ids is not None:
            for j in journal_ids:
                journal = Journal.pull(j)
                if journal is not None and journal.is_in_doaj():
                    yield account
                    break
Beispiel #16
0
def publishers_with_journals():
    """ Get accounts for all publishers with journals in the DOAJ """
    for acc in esprit.tasks.scroll(conn, 'account', q=publisher_query):
        account = Account(**acc)
        journal_ids = account.journal
        if journal_ids is not None:
            for j in journal_ids:
                journal = Journal.pull(j)
                if journal is not None and journal.is_in_doaj():
                    yield account
                    break
Beispiel #17
0
def suggestion2journal(suggestion):
    journal_data = deepcopy(suggestion.data)
    del journal_data['suggestion']
    del journal_data['index']
    del journal_data['admin']['application_status']
    del journal_data['id']
    del journal_data['created_date']
    del journal_data['last_updated']
    journal_data['bibjson']['active'] = True
    new_j = Journal(**journal_data)
    return new_j
Beispiel #18
0
    def make_journal(self):
        # first make a raw copy of the content into a journal
        journal_data = deepcopy(self.data)
        if "suggestion" in journal_data:
            del journal_data['suggestion']
        if "index" in journal_data:
            del journal_data['index']
        if "admin" in journal_data and "application_status" in journal_data["admin"]:
            del journal_data['admin']['application_status']
        if "id" in journal_data:
            del journal_data['id']
        if "created_date" in journal_data:
            del journal_data['created_date']
        if "last_updated" in journal_data:
            del journal_data['last_updated']
        if "bibjson" not in journal_data:
            journal_data["bibjson"] = {}
        journal_data['bibjson']['active'] = True

        new_j = Journal(**journal_data)

        # now deal with the fact that this could be a replacement of an existing journal
        if self.current_journal is not None:
            cj = Journal.pull(self.current_journal)

            # carry the id and the created date
            new_j.set_id(self.current_journal)
            new_j.set_created(cj.created_date)

            # set a reapplication date
            new_j.set_last_reapplication()

            # carry any continuations
            hist = cj.get_history_raw()
            if hist is not None and len(hist) > 0:
                new_j.set_history(cj.get_history_raw())

            # remove the reference to the current_journal
            del new_j.data["admin"]["current_journal"]

        return new_j
Beispiel #19
0
    def delete_selected(cls, query=None, owner=None, snapshot=True):
        if owner is not None:
            from portality.models import Journal
            issns = Journal.issns_by_owner(owner)
            q = ArticleQuery(issns=issns)
            query = q.query()

        if snapshot:
            articles = cls.iterate(query, page_size=1000)
            for article in articles:
                article.snapshot()
        return cls.delete_by_query(query)
Beispiel #20
0
    def delete_selected(cls, query=None, owner=None, snapshot=True):
        if owner is not None:
            from portality.models import Journal
            issns = Journal.issns_by_owner(owner)
            q = ArticleQuery(issns=issns)
            query = q.query()

        if snapshot:
            articles = cls.iterate(query, page_size=1000)
            for article in articles:
                article.snapshot()
        return cls.delete_by_query(query)
Beispiel #21
0
 def list_records(self,
                  from_date=None,
                  until_date=None,
                  oai_set=None,
                  list_size=None,
                  start_after=None):
     total, results = super(OAIPMHJournal,
                            self).list_records(from_date=from_date,
                                               until_date=until_date,
                                               oai_set=oai_set,
                                               list_size=list_size,
                                               start_after=start_after)
     return total, [Journal(**r) for r in results]
Beispiel #22
0
    def setUp(self):
        super(TestAdminEditMetadata, self).setUp()
        admin_account = Account.make_account(username="******",
                                             name="Admin",
                                             email="*****@*****.**",
                                             roles=["admin"])
        admin_account.set_password('password123')
        admin_account.save()

        publisher_account = Account.make_account(username="******",
                                                 name="Publisher",
                                                 email="*****@*****.**",
                                                 roles=["publisher"])
        publisher_account.set_password('password456')
        publisher_account.save(blocking=True)

        self.j = Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        self.j.save(blocking=True)
        self.a = Article(**ArticleFixtureFactory.make_article_source(
            in_doaj=True))
        self.a.save(blocking=True)
Beispiel #23
0
    def find_by_issn(cls, issns, owners):
        journals = []
        seen_issns = []

        for owner in owners:
            for eissn, pissn in issns:
                if eissn not in seen_issns and eissn is not None:
                    seen_issns.append(eissn)
                if pissn not in seen_issns and pissn is not None:
                    seen_issns.append(pissn)

                source = JournalFixtureFactory.make_journal_source(
                    in_doaj=True)
                journal = Journal(**source)
                journal.set_owner(owner)

                journal.bibjson().remove_identifiers("eissn")
                journal.bibjson().remove_identifiers("pissn")
                if eissn is not None:
                    journal.bibjson().add_identifier("eissn", eissn)
                if pissn is not None:
                    journal.bibjson().add_identifier("pissn", pissn)
                journals.append(journal)

        @classmethod
        def mock(cls, issns, in_doaj=None, max=10):
            if not isinstance(issns, list):
                issns = [issns]

            for issn in issns:
                if issn in seen_issns:
                    return journals

            return []

        return mock
def load_j2a_cases():
    journal = Journal(**JournalFixtureFactory.make_journal_source(
        in_doaj=True))
    account_source = AccountFixtureFactory.make_publisher_source()

    owner_account = Account(**deepcopy(account_source))
    owner_account.set_id(journal.owner)

    non_owner_publisher = Account(**deepcopy(account_source))

    non_publisher = Account(**deepcopy(account_source))
    non_publisher.remove_role("publisher")

    admin = Account(**deepcopy(account_source))
    admin.add_role("admin")

    return [
        param("no_journal_no_account",
              None,
              None,
              raises=exceptions.ArgumentException),
        param("no_journal_with_account",
              None,
              owner_account,
              raises=exceptions.ArgumentException),
        param("journal_no_account",
              journal,
              None,
              comparator=application_matches),
        param("journal_matching_account",
              journal,
              owner_account,
              comparator=application_matches),
        param("journal_unmatched_account",
              journal,
              non_owner_publisher,
              raises=exceptions.AuthoriseException),
        param("journal_non_publisher_account",
              journal,
              non_publisher,
              raises=exceptions.AuthoriseException),
        param("journal_admin_account",
              journal,
              admin,
              comparator=application_matches)
    ]
Beispiel #25
0
    def query_es(self):
        """
        Query Elasticsearch for a set of matches for this request.
        :return: The results of a query through the dao, a JSON object.
        """
        # Copy to the template, which will be populated with terms
        populated_query = deepcopy(TERMS_SEARCH)

        # Get all of the attributes with values set.
        set_attributes = [(x, getattr(self, x))
                          for x in JOURNAL_SCHEMA_KEYS[:-1]
                          if getattr(self, x)]

        # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
        if not self.genre:
            self.genre = SUPPORTED_GENRES[
                0]  # TODO: we may want to handle 404 instead

        # Set i to use either our mapping for journals or articles
        i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())

        # Add the attributes to the query
        for (k, v) in set_attributes:
            es_term = OPENURL_TO_ES[k][i]
            if es_term is None:
                continue
            else:
                term = {"term": {es_term: v}}
            populated_query["query"]["bool"]["must"].append(term)

        # avoid doing an empty query
        if len(populated_query["query"]["bool"]["must"]) == 0:
            app.logger.debug("No valid search terms in OpenURL object")
            return None

        # Return the results of the query
        if i == 0:
            app.logger.debug("OpenURL query to journal: " +
                             json.dumps(populated_query))
            return Journal.query(q=populated_query)
        elif i == 1:
            app.logger.debug("OpenURL query to article: " +
                             json.dumps(populated_query))
            return Article.query(q=populated_query)
Beispiel #26
0
def lookup_subject_categories(issns):
    """ By ISSN, get the subject classification of a journal """

    subjects_column = []

    for i in issns:
        il = [s.strip() for s in i.split(',')]
        j = Journal.find_by_issn(il, in_doaj=False)
        if len(j) == 0:
            subjects_column.append('Error: not found')
        elif len(j) == 1:
            subj = j[0].bibjson().subjects()
            subjects_column.append(', '.join(
                [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj]))
        else:
            subjects_column.append(
                'Error: multiple records found for that ISSN')

    return subjects_column
Beispiel #27
0
def load_issn_journal_map():
    # we need to go over every journal in the index
    for j in Journal.iterall():
        obj = {}
        jbib = j.bibjson()
        
        # store only the data we want for the article migration
        obj["subjects"] = jbib.subjects()
        obj["title"] = jbib.title
        obj["license"] = jbib.get_license()
        obj["language"] = jbib.language
        obj["country"] = jbib.country
        obj["in_doaj"] = j.is_in_doaj()
        
        # get the issns that map to this journal (or any previous version of it)
        issns = j.data.get("index", {}).get("issn", [])
        
        # register pointers to the object for each issn
        for issn in issns:
            issnmap[issn] = obj
Beispiel #28
0
    def get_journal(self):
        """
        Get this article's associated journal
        :return: A Journal, or None if this is an orphan article
        """
        bibjson = self.bibjson()

        # first, get the ISSNs associated with the record
        pissns = bibjson.get_identifiers(bibjson.P_ISSN)
        eissns = bibjson.get_identifiers(bibjson.E_ISSN)
        allissns = list(set(pissns + eissns))

        # find a matching journal record from the index
        journal = None
        for issn in allissns:
            journals = Journal.find_by_issn(issn)
            if len(journals) > 0:
                # there should only ever be one, so take the first one
                journal = journals[0]
                break

        return journal
Beispiel #29
0
    def query_es(self):
        """
        Query Elasticsearch for a set of matches for this request.
        :return: The results of a query through the dao, a JSON object.
        """
        # Copy to the template, which will be populated with terms
        populated_query = deepcopy(TERMS_SEARCH)

        # Get all of the attributes with values set.
        set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)]

        # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
        if not self.genre:
            self.genre = SUPPORTED_GENRES[0]    # TODO: we may want to handle 404 instead

        # Set i to use either our mapping for journals or articles
        i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())

        # Add the attributes to the query
        for (k, v) in set_attributes:
            es_term = OPENURL_TO_ES[k][i]
            if es_term is None:
                continue
            else:
                term = {"term": {es_term: v}}
            populated_query["query"]["bool"]["must"].append(term)

        # avoid doing an empty query
        if len(populated_query["query"]["bool"]["must"]) == 0:
            app.logger.debug("No valid search terms in OpenURL object")
            return None

        # Return the results of the query
        if i == 0:
            app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query))
            return Journal.query(q=populated_query)
        elif i == 1:
            app.logger.debug("OpenURL query to article: " + json.dumps(populated_query))
            return Article.query(q=populated_query)
Beispiel #30
0
    def get_journal(self):
        """
        Get this article's associated journal
        :return: A Journal, or None if this is an orphan article
        """
        bibjson = self.bibjson()

        # first, get the ISSNs associated with the record
        pissns = bibjson.get_identifiers(bibjson.P_ISSN)
        eissns = bibjson.get_identifiers(bibjson.E_ISSN)
        allissns = list(set(pissns + eissns))

        # find a matching journal record from the index
        journal = None
        for issn in allissns:
            journals = Journal.find_by_issn(issn)
            if len(journals) > 0:
                # there should only ever be one, so take the first one
                journal = journals[0]
                break

        return journal
Beispiel #31
0
 def get_associated_journals(self):
     # find all matching journal record from the index
     allissns = self.bibjson().issns()
     return Journal.find_by_issn(allissns)
Beispiel #32
0
    def form2obj(form, existing_journal):
        journal = Journal()
        bibjson = journal.bibjson()

        # The if statements that wrap practically every field are there due to this
        # form being used to edit old journals which don't necessarily have most of
        # this info.
        # It also allows admins to delete the contents of any field if they wish,
        # by ticking the "Allow incomplete form" checkbox and deleting the contents
        # of that field. The if condition(s) will then *not* add the relevant field to the
        # new journal object being constructed.
        # add_url in the journal model has a safeguard against empty URL-s.

        if form.title.data:
            bibjson.title = form.title.data
        bibjson.add_url(form.url.data, urltype='homepage')
        if form.alternative_title.data:
            bibjson.alternative_title = form.alternative_title.data
        if form.pissn.data:
            bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data)
        if form.eissn.data:
            bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data)
        if form.publisher.data:
            bibjson.publisher = form.publisher.data
        if form.society_institution.data:
            bibjson.institution = form.society_institution.data
        if form.platform.data:
            bibjson.provider = form.platform.data
        if form.contact_name.data or form.contact_email.data:
            journal.add_contact(form.contact_name.data, form.contact_email.data)
        if form.country.data:
            bibjson.country = form.country.data

        if forms.interpret_special(form.processing_charges.data):
            bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data)

        if forms.interpret_special(form.submission_charges.data):
            bibjson.set_submission_charges(form.submission_charges_currency.data, form.submission_charges_amount.data)

        if forms.interpret_special(form.waiver_policy.data):
            bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy')

        # checkboxes
        if forms.interpret_special(form.digital_archiving_policy.data) or form.digital_archiving_policy_url.data:
            archiving_policies = forms.interpret_special(form.digital_archiving_policy.data)
            archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True)
            archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True)
            bibjson.set_archiving_policy(archiving_policies, form.digital_archiving_policy_url.data)

        if form.crawl_permission.data and form.crawl_permission.data != 'None':
            bibjson.allows_fulltext_indexing = forms.interpret_special(form.crawl_permission.data)  # just binary

        # checkboxes
        article_ids = forms.interpret_special(form.article_identifiers.data)
        article_ids = forms.interpret_other(article_ids, form.article_identifiers_other.data)
        if article_ids:
            bibjson.persistent_identifier_scheme = article_ids

        if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data:
            bibjson.set_article_statistics(form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data))

        if form.first_fulltext_oa_year.data:
            bibjson.set_oa_start(year=form.first_fulltext_oa_year.data)

        # checkboxes
        fulltext_format = forms.interpret_other(form.fulltext_format.data, form.fulltext_format_other.data)
        if fulltext_format:
            bibjson.format = fulltext_format

        if form.keywords.data:
            bibjson.set_keywords(form.keywords.data)  # tag list field

        if form.languages.data:
            bibjson.set_language(form.languages.data)  # select multiple field - gives a list back

        bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board')

        if form.review_process.data or form.review_process_url.data:
            bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data)

        bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope')
        bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions')

        if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data:
            bibjson.set_plagiarism_detection(
                form.plagiarism_screening_url.data,
                has_detection=forms.interpret_special(form.plagiarism_screening.data)
            )

        if form.publication_time.data:
            bibjson.publication_time = form.publication_time.data

        bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement')

        license_type = forms.interpret_other(form.license.data, form.license_other.data)
        if forms.interpret_special(license_type):
        # "None" and "False" as strings like they come out of the WTForms processing)
        # would get interpreted correctly by this check, so "None" licenses should not appear
            if license_type in licenses:
                by = licenses[license_type]['BY']
                nc = licenses[license_type]['NC']
                nd = licenses[license_type]['ND']
                sa = licenses[license_type]['SA']
                license_title = licenses[license_type]['title']
            elif form.license_checkbox.data:
                by = True if 'BY' in form.license_checkbox.data else False
                nc = True if 'NC' in form.license_checkbox.data else False
                nd = True if 'ND' in form.license_checkbox.data else False
                sa = True if 'SA' in form.license_checkbox.data else False
                license_title = license_type
            else:
                by = None; nc = None; nd = None; sa = None;
                license_title = license_type

            bibjson.set_license(
                license_title,
                license_type,
                url=form.license_url.data,
                open_access=forms.interpret_special(form.open_access.data),
                by=by, nc=nc, nd=nd, sa=sa,
                embedded=forms.interpret_special(form.license_embedded.data),
                embedded_example_url=form.license_embedded_url.data
            )

        # checkboxes
        deposit_policies = forms.interpret_special(form.deposit_policy.data)  # need empty list if it's just "None"
        deposit_policies = forms.interpret_other(deposit_policies, form.deposit_policy_other.data)
        if deposit_policies:
            bibjson.deposit_policy = deposit_policies

        if form.copyright.data and form.copyright.data != 'None':
            holds_copyright = forms.interpret_other(
                forms.interpret_special(form.copyright.data),
                form.copyright_other.data
            )
            bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright)

        if form.publishing_rights.data and form.publishing_rights.data != 'None':
            publishing_rights = forms.interpret_other(
                forms.interpret_special(form.publishing_rights.data),
                form.publishing_rights_other.data
            )
            bibjson.set_author_publishing_rights(form.publishing_rights_url.data, holds_rights=publishing_rights)

        # need to copy over the notes from the existing journal object, if any, otherwise
        # the dates on all the notes will get reset to right now (i.e. last_updated)
        # since the journal object we're creating in this xwalk is a new, empty one
        journal.set_notes(existing_journal.notes())

        # generate index of notes, just the text
        curnotes = []
        for curnote in journal.notes():
            curnotes.append(curnote['note'])

        # add any new notes
        formnotes = []
        for formnote in form.notes.data:
            if formnote['note']:
                if formnote['note'] not in curnotes and formnote["note"] != "":
                    journal.add_note(formnote['note'])
                # also generate another text index of notes, this time an index of the form notes
                formnotes.append(formnote['note'])

        if current_user.has_role("delete_note"):
            # delete all notes not coming back from the form, means they've been deleted
            # also if one of the saved notes is completely blank, delete it
            for curnote in journal.notes()[:]:
                if not curnote['note'] or curnote['note'] not in formnotes:
                    journal.remove_note(curnote)

        new_subjects = []
        for code in form.subject.data:
            sobj = {"scheme": 'LCC', "term": lcc.lookup_code(code), "code": code}
            new_subjects.append(sobj)
        bibjson.set_subjects(new_subjects)

        owner = form.owner.data.strip()
        if owner:
            journal.set_owner(owner)

        editor_group = form.editor_group.data.strip()
        if editor_group:
            journal.set_editor_group(editor_group)

        editor = form.editor.data.strip()
        if editor:
            journal.set_editor(editor)

        # old fields - only create them in the journal record if the values actually exist
        # need to use interpret_special in the test condition in case 'None' comes back from the form
        if getattr(form, 'author_pays', None):
            if forms.interpret_special(form.author_pays.data):
                bibjson.author_pays = form.author_pays.data
        if getattr(form, 'author_pays_url', None):
            if forms.interpret_special(form.author_pays_url.data):
                bibjson.author_pays_url = form.author_pays_url.data
        if getattr(form, 'oa_end_year', None):
            if forms.interpret_special(form.oa_end_year.data):
                bibjson.set_oa_end(form.oa_end_year.data)

        return journal
Beispiel #33
0
def migrate_journals(source):
    # read in the content
    f = open(source)
    xml = etree.parse(f)
    f.close()
    journals = xml.getroot()
    print "migrating", str(len(journals)), "journal records"
    
    clusters = _get_journal_clusters(journals)
    
    # make a journal object, and map the main and historic records to it
    for canon, rest in clusters:
        j = Journal()
        
        cb = _to_journal_bibjson(canon)
        j.set_bibjson(cb)
        
        j.set_in_doaj(_is_in_doaj(canon))
        j.set_created(_created_date(canon))
        
        for p in rest:
            replaces = _get_replaces(p)
            isreplacedby = _get_isreplacedby(p)
            j.add_history(_to_journal_bibjson(p), replaces=replaces, isreplacedby=isreplacedby)
            
        j.save()
Beispiel #34
0
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save()

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(
                    eissn="1234-5678",
                    pissn="9876-5432",
                    doi=the_doi,
                    fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save(blocking=True)
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678"  # one matching
            pissn = "6789-1234"  # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article)
        else:
            possible_articles = svc.discover_duplicates(article)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[
                        -1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[
                        0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
Beispiel #35
0
    def add_journal_metadata(self, j=None, reg=None):
        """
        this function makes sure the article is populated
        with all the relevant info from its owning parent object
        :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE!
        """

        # Record the data that is copied into the article into the "reg"ister, in case the
        # caller needs to know exactly and only which information was copied
        if reg is None:
            reg = Journal()
        rbj = reg.bibjson()

        if j is None:
            journal = self.get_journal()
        else:
            journal = j

        # we were unable to find a journal
        if journal is None:
            raise NoJournalException("Unable to find a journal associated with this article")

        # if we get to here, we have a journal record we want to pull data from
        jbib = journal.bibjson()
        bibjson = self.bibjson()

        # tripwire to be tripped if the journal makes changes to the article
        trip = False

        if bibjson.subjects() != jbib.subjects():
            trip = True
            bibjson.set_subjects(jbib.subjects())
        rbj.set_subjects(jbib.subjects())

        if jbib.title is not None:
            if bibjson.journal_title != jbib.title:
                trip = True
                bibjson.journal_title = jbib.title
            rbj.title = jbib.title

        if jbib.get_license() is not None:
            lic = jbib.get_license()
            alic = bibjson.get_journal_license()

            if lic is not None and (alic is None or (lic.get("title") != alic.get("title") or
                    lic.get("type") != alic.get("type") or
                    lic.get("url") != alic.get("url") or
                    lic.get("version") != alic.get("version") or
                    lic.get("open_access") != alic.get("open_access"))):

                bibjson.set_journal_license(lic.get("title"),
                                            lic.get("type"),
                                            lic.get("url"),
                                            lic.get("version"),
                                            lic.get("open_access"))
                trip = True

            rbj.set_license(lic.get("title"),
                            lic.get("type"),
                            lic.get("url"),
                            lic.get("version"),
                            lic.get("open_access"))

        if len(jbib.language) > 0:
            jlang = jbib.language
            alang = bibjson.journal_language
            jlang.sort()
            alang.sort()
            if jlang != alang:
                bibjson.journal_language = jbib.language
                trip = True
            rbj.set_language(jbib.language)

        if jbib.country is not None:
            if jbib.country != bibjson.journal_country:
                bibjson.journal_country = jbib.country
                trip = True
            rbj.country = jbib.country

        if jbib.publisher:
            if jbib.publisher != bibjson.publisher:
                bibjson.publisher = jbib.publisher
                trip = True
            rbj.publisher = jbib.publisher

        # Copy the seal info, in_doaj status and the journal's ISSNs
        if journal.is_in_doaj() != self.is_in_doaj():
            self.set_in_doaj(journal.is_in_doaj())
            trip = True
        reg.set_in_doaj(journal.is_in_doaj())

        if journal.has_seal() != self.has_seal():
            self.set_seal(journal.has_seal())
            trip = True
        reg.set_seal(journal.has_seal())

        try:
            aissns = bibjson.journal_issns
            jissns = jbib.issns()
            aissns.sort()
            jissns.sort()
            if aissns != jissns:
                bibjson.journal_issns = jbib.issns()
                trip = True

            eissns = jbib.get_identifiers(jbib.E_ISSN)
            pissns = jbib.get_identifiers(jbib.P_ISSN)
            if eissns is not None and len(eissns) > 0:
                rbj.add_identifier(rbj.E_ISSN, eissns[0])
            if pissns is not None and len(pissns) > 0:
                rbj.add_identifier(rbj.P_ISSN, pissns[0])
        except KeyError:
            # No issns, don't worry about it for now
            pass

        return trip
Beispiel #36
0
    def form2obj(form, existing_journal):
        journal = Journal()
        bibjson = journal.bibjson()

        # The if statements that wrap practically every field are there due to this
        # form being used to edit old journals which don't necessarily have most of
        # this info.
        # It also allows admins to delete the contents of any field if they wish,
        # by ticking the "Allow incomplete form" checkbox and deleting the contents
        # of that field. The if condition(s) will then *not* add the relevant field to the
        # new journal object being constructed.
        # add_url in the journal model has a safeguard against empty URL-s.

        if form.title.data:
            bibjson.title = form.title.data
        bibjson.add_url(form.url.data, urltype='homepage')
        if form.alternative_title.data:
            bibjson.alternative_title = form.alternative_title.data
        if form.pissn.data:
            bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data)
        if form.eissn.data:
            bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data)
        if form.publisher.data:
            bibjson.publisher = form.publisher.data
        if form.society_institution.data:
            bibjson.institution = form.society_institution.data
        if form.platform.data:
            bibjson.provider = form.platform.data
        if form.contact_name.data or form.contact_email.data:
            journal.add_contact(form.contact_name.data,
                                form.contact_email.data)
        if form.country.data:
            bibjson.country = form.country.data

        if forms.interpret_special(form.processing_charges.data):
            bibjson.set_apc(form.processing_charges_currency.data,
                            form.processing_charges_amount.data)

        if forms.interpret_special(form.submission_charges.data):
            bibjson.set_submission_charges(
                form.submission_charges_currency.data,
                form.submission_charges_amount.data)

        if forms.interpret_special(form.waiver_policy.data):
            bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy')

        # checkboxes
        if forms.interpret_special(form.digital_archiving_policy.data
                                   ) or form.digital_archiving_policy_url.data:
            archiving_policies = forms.interpret_special(
                form.digital_archiving_policy.data)
            archiving_policies = forms.interpret_other(
                archiving_policies,
                form.digital_archiving_policy_other.data,
                store_other_label=True)
            archiving_policies = forms.interpret_other(
                archiving_policies,
                form.digital_archiving_policy_library.data,
                forms.digital_archiving_policy_specific_library_value,
                store_other_label=True)
            bibjson.set_archiving_policy(
                archiving_policies, form.digital_archiving_policy_url.data)

        if form.crawl_permission.data and form.crawl_permission.data != 'None':
            bibjson.allows_fulltext_indexing = forms.interpret_special(
                form.crawl_permission.data)  # just binary

        # checkboxes
        article_ids = forms.interpret_special(form.article_identifiers.data)
        article_ids = forms.interpret_other(
            article_ids, form.article_identifiers_other.data)
        if article_ids:
            bibjson.persistent_identifier_scheme = article_ids

        if (form.download_statistics.data and form.download_statistics.data !=
                'None') or form.download_statistics_url.data:
            bibjson.set_article_statistics(
                form.download_statistics_url.data,
                forms.interpret_special(form.download_statistics.data))

        if form.first_fulltext_oa_year.data:
            bibjson.set_oa_start(year=form.first_fulltext_oa_year.data)

        # checkboxes
        fulltext_format = forms.interpret_other(
            form.fulltext_format.data, form.fulltext_format_other.data)
        if fulltext_format:
            bibjson.format = fulltext_format

        if form.keywords.data:
            bibjson.set_keywords(form.keywords.data)  # tag list field

        if form.languages.data:
            bibjson.set_language(form.languages.data
                                 )  # select multiple field - gives a list back

        bibjson.add_url(form.editorial_board_url.data,
                        urltype='editorial_board')

        if form.review_process.data or form.review_process_url.data:
            bibjson.set_editorial_review(form.review_process.data,
                                         form.review_process_url.data)

        bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope')
        bibjson.add_url(form.instructions_authors_url.data,
                        urltype='author_instructions')

        if (form.plagiarism_screening.data and form.plagiarism_screening.data
                != 'None') or form.plagiarism_screening_url.data:
            bibjson.set_plagiarism_detection(
                form.plagiarism_screening_url.data,
                has_detection=forms.interpret_special(
                    form.plagiarism_screening.data))

        if form.publication_time.data:
            bibjson.publication_time = form.publication_time.data

        bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement')

        license_type = forms.interpret_other(form.license.data,
                                             form.license_other.data)
        if forms.interpret_special(license_type):
            # "None" and "False" as strings like they come out of the WTForms processing)
            # would get interpreted correctly by this check, so "None" licenses should not appear
            if license_type in licenses:
                by = licenses[license_type]['BY']
                nc = licenses[license_type]['NC']
                nd = licenses[license_type]['ND']
                sa = licenses[license_type]['SA']
                license_title = licenses[license_type]['title']
            elif form.license_checkbox.data:
                by = True if 'BY' in form.license_checkbox.data else False
                nc = True if 'NC' in form.license_checkbox.data else False
                nd = True if 'ND' in form.license_checkbox.data else False
                sa = True if 'SA' in form.license_checkbox.data else False
                license_title = license_type
            else:
                by = None
                nc = None
                nd = None
                sa = None
                license_title = license_type

            bibjson.set_license(
                license_title,
                license_type,
                url=form.license_url.data,
                open_access=forms.interpret_special(form.open_access.data),
                by=by,
                nc=nc,
                nd=nd,
                sa=sa,
                embedded=forms.interpret_special(form.license_embedded.data),
                embedded_example_url=form.license_embedded_url.data)

        # checkboxes
        deposit_policies = forms.interpret_special(
            form.deposit_policy.data)  # need empty list if it's just "None"
        deposit_policies = forms.interpret_other(
            deposit_policies, form.deposit_policy_other.data)
        if deposit_policies:
            bibjson.deposit_policy = deposit_policies

        if form.copyright.data and form.copyright.data != 'None':
            holds_copyright = forms.interpret_other(
                forms.interpret_special(form.copyright.data),
                form.copyright_other.data)
            bibjson.set_author_copyright(form.copyright_url.data,
                                         holds_copyright=holds_copyright)

        if form.publishing_rights.data and form.publishing_rights.data != 'None':
            publishing_rights = forms.interpret_other(
                forms.interpret_special(form.publishing_rights.data),
                form.publishing_rights_other.data)
            bibjson.set_author_publishing_rights(
                form.publishing_rights_url.data,
                holds_rights=publishing_rights)

        # need to copy over the notes from the existing journal object, if any, otherwise
        # the dates on all the notes will get reset to right now (i.e. last_updated)
        # since the journal object we're creating in this xwalk is a new, empty one
        journal.set_notes(existing_journal.notes())

        # generate index of notes, just the text
        curnotes = []
        for curnote in journal.notes():
            curnotes.append(curnote['note'])

        # add any new notes
        formnotes = []
        for formnote in form.notes.data:
            if formnote['note']:
                if formnote['note'] not in curnotes and formnote["note"] != "":
                    journal.add_note(formnote['note'])
                # also generate another text index of notes, this time an index of the form notes
                formnotes.append(formnote['note'])

        if current_user.has_role("delete_note"):
            # delete all notes not coming back from the form, means they've been deleted
            # also if one of the saved notes is completely blank, delete it
            for curnote in journal.notes()[:]:
                if not curnote['note'] or curnote['note'] not in formnotes:
                    journal.remove_note(curnote)

        new_subjects = []
        for code in form.subject.data:
            sobj = {
                "scheme": 'LCC',
                "term": lcc.lookup_code(code),
                "code": code
            }
            new_subjects.append(sobj)
        bibjson.set_subjects(new_subjects)

        owner = form.owner.data.strip()
        if owner:
            journal.set_owner(owner)

        editor_group = form.editor_group.data.strip()
        if editor_group:
            journal.set_editor_group(editor_group)

        editor = form.editor.data.strip()
        if editor:
            journal.set_editor(editor)

        # old fields - only create them in the journal record if the values actually exist
        # need to use interpret_special in the test condition in case 'None' comes back from the form
        if getattr(form, 'author_pays', None):
            if forms.interpret_special(form.author_pays.data):
                bibjson.author_pays = form.author_pays.data
        if getattr(form, 'author_pays_url', None):
            if forms.interpret_special(form.author_pays_url.data):
                bibjson.author_pays_url = form.author_pays_url.data
        if getattr(form, 'oa_end_year', None):
            if forms.interpret_special(form.oa_end_year.data):
                bibjson.set_oa_end(form.oa_end_year.data)

        return journal
    def test_01_accept_application(self, name, application_type, account_type, manual_update, provenance, raises, result_provenance, result_manual_update):

        ###############################################
        ## set up

        # create the application
        application = None
        if application_type == "save_fail":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.save = mock_save
            Journal.save = mock_save
        elif application_type == "with_current_journal":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.remove_notes()
            application.add_note("unique 1", "2002-01-01T00:00:00Z")
            application.add_note("duplicate", "2001-01-01T00:00:00Z")
            cj = application.current_journal
            journal = Journal(**JournalFixtureFactory.make_journal_source())
            journal.set_id(cj)
            journal.remove_notes()
            journal.add_note("unique 2", "2003-01-01T00:00:00Z")
            journal.add_note("duplicate", "2001-01-01T00:00:00Z")
            journal.save(blocking=True)
        elif application_type == "no_current_journal":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.remove_current_journal()


        acc = None
        if account_type == "not_allowed":
            acc = Account(**AccountFixtureFactory.make_publisher_source())
        elif account_type == "allowed":
            acc = Account(**AccountFixtureFactory.make_managing_editor_source())

        mu = None
        if manual_update in ["true", "false"]:
            mu = manual_update == "true"

        prov = None
        if provenance in ["true", "false"]:
            prov = provenance == "true"

        save = bool(randint(0,1))

        ###########################################################
        # Execution

        svc = DOAJ.applicationService()
        if raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.accept_application(application, acc, mu, prov)
        else:
            journal = svc.accept_application(application, acc, mu, prov, save_journal=save, save_application=save)

            # we need to sleep, so the index catches up
            time.sleep(1)

            # check a few common things
            assert application.application_status == constants.APPLICATION_STATUS_ACCEPTED
            assert application.current_journal is None
            assert journal.current_application is None
            assert application.related_journal == journal.id

            related = journal.related_applications
            if application_type == "with_current_journal":
                assert len(related) == 3
            elif application_type == "no_current_journal":
                assert len(related) == 1
            assert related[0].get("application_id") == application.id
            assert related[0].get("date_accepted") is not None

            if result_manual_update == "yes":
                assert journal.last_manual_update is not None
                assert journal.last_manual_update != "1970-01-01T00:00:00Z"
                assert application.last_manual_update is not None
                assert application.last_manual_update != "1970-01-01T00:00:00Z"
            elif result_manual_update == "no":
                assert journal.last_manual_update is None
                assert application.last_manual_update is None

            if application_type == "with_current_journal":
                assert len(journal.notes) == 3
                notevals = [note.get("note") for note in journal.notes]
                assert "duplicate" in notevals
                assert "unique 1" in notevals
                assert "unique 2" in notevals

            app_prov = Provenance.get_latest_by_resource_id(application.id)
            if result_provenance == "yes":
                assert app_prov is not None
            elif result_provenance == "no":
                assert app_prov is None

            if save:
                pass
Beispiel #38
0
    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals,
                                      key=lambda j: j.created_timestamp)

            # we set an application as having a related journal in the following conditions:
            # 1. The application was created before the journal and last updated near or after the journal created date,
            #       and this journal is the nearest one in time
            # 2. The last_reapplication date is after the application created date, and is the nearest one
            app_created = application.created_timestamp
    def test_01_delete_application(self, name, application_type, account_type, current_journal, related_journal, raises):

        ###############################################
        ## set up

        # create the test application (if needed), and the associated current_journal and related_journal in suitable states
        application = None
        cj = None
        rj = None
        if application_type == "found" or application_type == "locked":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())

            if current_journal == "none":
                application.remove_current_journal()
            elif current_journal == "not_found":
                application.set_current_journal("123456789987654321")
            elif current_journal == "found":
                cj = Journal(**JournalFixtureFactory.make_journal_source())
                cj.set_id(cj.makeid())
                cj.save(blocking=True)
                application.set_current_journal(cj.id)
            elif current_journal == "locked":
                cj = Journal(**JournalFixtureFactory.make_journal_source())
                cj.set_id(cj.makeid())
                cj.save(blocking=True)
                application.set_current_journal(cj.id)
                lock.lock(constants.LOCK_JOURNAL, cj.id, "otheruser")

            if related_journal == "none":
                application.remove_related_journal()
            elif related_journal == "not_found":
                application.set_related_journal("123456789987654321")
            elif related_journal == "found":
                rj = Journal(**JournalFixtureFactory.make_journal_source())
                rj.set_id(rj.makeid())
                rj.save(blocking=True)
                application.set_related_journal(rj.id)
            elif related_journal == "locked":
                rj = Journal(**JournalFixtureFactory.make_journal_source())
                rj.set_id(rj.makeid())
                rj.save(blocking=True)
                application.set_related_journal(rj.id)
                lock.lock(constants.LOCK_JOURNAL, rj.id, "otheruser")


        acc = None
        if account_type != "none":
            acc = Account(**AccountFixtureFactory.make_publisher_source())
            if account_type == "not_permitted":
                acc.remove_role("publisher")
            if application_type == "locked":
                thelock = lock.lock(constants.LOCK_APPLICATION, application.id, "otheruser")
                # we can't explicitly block on the lock, but we can halt until we confirm it is saved
                thelock.blockall([(thelock.id, thelock.last_updated)])

        application_id = None
        if application is not None:
            if acc is not None:
                application.set_owner(acc.id)
            application.save(blocking=True)
            application_id = application.id
        elif application_type == "not_found":
            application_id = u"sdjfasofwefkwflkajdfasjd"

        ###########################################################
        # Execution

        svc = DOAJ.applicationService()
        if raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.delete_application(application_id, acc)
            time.sleep(1)
            check_locks(application, cj, rj, acc)
        else:
            svc.delete_application(application_id, acc)

            # we need to sleep, so the index catches up
            time.sleep(1)

            # check that no locks remain set for this user
            check_locks(application, cj, rj, acc)

            # check that the application actually is gone
            if application is not None:
                assert Suggestion.pull(application.id) is None

            # check that the current journal no longer has a reference to the application
            if cj is not None:
                cj = Journal.pull(cj.id)
                assert cj.current_application is None

            # check that the related journal has a record that the application was deleted
            if rj is not None:
                rj = Journal.pull(rj.id)
                record = rj.related_application_record(application.id)
                assert "status" in record
                assert record["status"] == "deleted"
    def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises):
        # set up for the test
        #########################################

        cj = None
        has_seal = bool(randint(0, 1))
        application = None
        if application_type == "present":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.set_id(application.makeid())
            application.remove_contacts()
            application.remove_editor_group()
            application.remove_editor()
            application.remove_owner()
            application.remove_current_journal()
            application.remove_notes()

            if app_key_properties == "yes":
                application.add_contact("Application", "*****@*****.**")
                application.set_editor_group("appeditorgroup")
                application.set_editor("appeditor")
                application.set_owner("appowner")

            application.set_seal(has_seal)
            application.add_note("Application Note")

            if current_journal == "present":
                journal = Journal(**JournalFixtureFactory.make_journal_source())
                journal.remove_contacts()
                journal.add_contact("Journal", "*****@*****.**")
                journal.set_editor_group("journaleditorgroup")
                journal.set_editor("journaleditor")
                journal.set_owner("journalowner")
                journal.remove_current_application()
                journal.remove_notes()
                journal.add_note("Journal Note")
                journal.save(blocking=True)
                application.set_current_journal(journal.id)
                cj = journal
            elif current_journal == "missing":
                application.set_current_journal("123456789987654321")

        manual_update = None
        if manual_update_arg == "true":
            manual_update = True
        elif manual_update_arg == "false":
            manual_update = False

        # execute the test
        ########################################

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.application_2_journal(application, manual_update)
        else:
            journal = svc.application_2_journal(application, manual_update)

            # check the result
            ######################################

            assert journal is not None
            assert isinstance(journal, Journal)
            assert journal.is_in_doaj() is True

            jbj = journal.bibjson().data
            del jbj["active"]
            assert jbj == application.bibjson().data

            if current_journal == "present":
                assert len(journal.related_applications) == 3
            else:
                assert len(journal.related_applications) == 1
            related = journal.related_application_record(application.id)
            assert related is not None

            if manual_update_arg == "true":
                assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z"

            if app_key_properties == "yes":
                contacts = journal.contacts()
                assert len(contacts) == 1
                assert contacts[0].get("name") == "Application"
                assert contacts[0].get("email") == "*****@*****.**"
                assert journal.editor_group == "appeditorgroup"
                assert journal.editor == "appeditor"
                assert journal.owner == "appowner"
                assert journal.has_seal() == has_seal

                if current_journal == "present":
                    assert len(journal.notes) == 2
                else:
                    assert len(journal.notes) == 1

            elif app_key_properties == "no":
                if current_journal == "present":
                    contacts = journal.contacts()
                    assert len(contacts) == 1
                    assert contacts[0].get("name") == "Journal"
                    assert contacts[0].get("email") == "*****@*****.**"
                    assert journal.editor_group == "journaleditorgroup"
                    assert journal.editor == "journaleditor"
                    assert journal.owner == "journalowner"
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 2

                elif current_journal == "none" or current_journal == "missing":
                    contacts = journal.contacts()
                    assert len(contacts) == 0
                    assert journal.editor_group is None
                    assert journal.editor is None
                    assert journal.owner is None
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 1

            if current_journal == "present":
                assert cj.id == journal.id
                assert cj.created_date == journal.created_date
Beispiel #41
0
 def get_associated_journals(self):
     # find all matching journal record from the index
     allissns = self.bibjson().issns()
     return Journal.find_by_issn(allissns)
Beispiel #42
0
from portality.models import Journal
import sys

# FIXME: in an ideal world, the functional tests would also be wrapped by doaj.helpers.DoajTestCase
from doajtest.bootstrap import prepare_for_test
prepare_for_test()

journals = [
    {'title':'Title 1', 'publisher': 'Publisher 1', 'identifier':[{'type':'pissn', 'id': '1234-5678'}], 'active':True},
    {'title':'Title 2', 'publisher': 'Publisher 2', 'identifier':[{'type':'pissn', 'id': '0123-4567'}, {'type':'eissn', 'id': '7654-3210'}], 'license': [{'type': 'cc-by','open_access': True, 'url': 'http://opendefinition.org/licenses/cc-by/'}], 'active': True},
    {'title':'Title 3', 'publisher': 'Publisher 3', 'author_pays': False, 'active': False},
    {'title':'Title 4', 'publisher': 'Publisher 3', 'language': 'en', 'active': True},
    {'title':'The "Quoted" Journal of Testing CSV Quotes', 'publisher': 'Publisher 2', 'identifier':[{'type':'pissn', 'id': '2345-6789'}], 'active': False, 'language': 'bg', 'author_pays':True, 'active':True, },
]

for j in journals:
    jm = Journal(**{'bibjson':j})

    jm.save()

print 'Sent {0} Journal documents to the index'.format(len(journals))
print 'Refreshing the index'
Journal.refresh()
Beispiel #43
0
    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals, key=lambda j: j.created_timestamp)

            # we set an application as having a related journal in the following conditions:
            # 1. The application was created before the journal and last updated near or after the journal created date,
            #       and this journal is the nearest one in time
            # 2. The last_reapplication date is after the application created date, and is the nearest one
            app_created = application.created_timestamp
            for journal in related_journals:
def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb",
                     "utf-8") as f, codecs.open(outfile_missing, "wb",
                                                "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow([
            "Application ID", "Application Last Updated",
            "Latest Provenance Recorded", "Difference"
        ])

        out_missing = UnicodeWriter(g)
        out_missing.writerow([
            "Application ID", "Application Last Manual Update",
            "Latest Provenance Record", "ISSNs", "Title"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([
                        application.id, application.last_updated, created, diff
                    ])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(
                    application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(
                        application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len(
                        [j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([
                        application.id, application.last_manual_update,
                        created, " ".join(application.bibjson().issns()),
                        application.bibjson().title
                    ])

        print "processed", counter, "suggestions"
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                        limit_to_account, add_journal_info, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                             limit_to_account, add_journal_info, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None

            if add_journal_info:
                assert article.bibjson().journal_title == "Add Journal Info Title"
Beispiel #46
0
    def add_journal_metadata(self, j=None, reg=None):
        """
        this function makes sure the article is populated
        with all the relevant info from its owning parent object
        :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE!
        """

        # Record the data that is copied into the article into the "reg"ister, in case the
        # caller needs to know exactly and only which information was copied
        if reg is None:
            reg = Journal()
        rbj = reg.bibjson()

        if j is None:
            journal = self.get_journal()
        else:
            journal = j

        # we were unable to find a journal
        if journal is None:
            raise NoJournalException(
                "Unable to find a journal associated with this article")

        # if we get to here, we have a journal record we want to pull data from
        jbib = journal.bibjson()
        bibjson = self.bibjson()

        # tripwire to be tripped if the journal makes changes to the article
        trip = False

        if bibjson.subjects() != jbib.subjects():
            trip = True
            bibjson.set_subjects(jbib.subjects())
        rbj.set_subjects(jbib.subjects())

        if jbib.title is not None:
            if bibjson.journal_title != jbib.title:
                trip = True
                bibjson.journal_title = jbib.title
            rbj.title = jbib.title

        if jbib.get_license() is not None:
            lic = jbib.get_license()
            alic = bibjson.get_journal_license()

            if lic is not None and (
                    alic is None or
                (lic.get("title") != alic.get("title")
                 or lic.get("type") != alic.get("type")
                 or lic.get("url") != alic.get("url")
                 or lic.get("version") != alic.get("version")
                 or lic.get("open_access") != alic.get("open_access"))):

                bibjson.set_journal_license(lic.get("title"), lic.get("type"),
                                            lic.get("url"), lic.get("version"),
                                            lic.get("open_access"))
                trip = True

            rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"),
                            lic.get("version"), lic.get("open_access"))

        if len(jbib.language) > 0:
            jlang = jbib.language
            alang = bibjson.journal_language
            jlang.sort()
            alang.sort()
            if jlang != alang:
                bibjson.journal_language = jbib.language
                trip = True
            rbj.set_language(jbib.language)

        if jbib.country is not None:
            if jbib.country != bibjson.journal_country:
                bibjson.journal_country = jbib.country
                trip = True
            rbj.country = jbib.country

        if jbib.publisher:
            if jbib.publisher != bibjson.publisher:
                bibjson.publisher = jbib.publisher
                trip = True
            rbj.publisher = jbib.publisher

        # Copy the seal info, in_doaj status and the journal's ISSNs
        if journal.is_in_doaj() != self.is_in_doaj():
            self.set_in_doaj(journal.is_in_doaj())
            trip = True
        reg.set_in_doaj(journal.is_in_doaj())

        if journal.has_seal() != self.has_seal():
            self.set_seal(journal.has_seal())
            trip = True
        reg.set_seal(journal.has_seal())

        try:
            aissns = bibjson.journal_issns
            jissns = jbib.issns()
            aissns.sort()
            jissns.sort()
            if aissns != jissns:
                bibjson.journal_issns = jbib.issns()
                trip = True

            eissns = jbib.get_identifiers(jbib.E_ISSN)
            pissns = jbib.get_identifiers(jbib.P_ISSN)
            if eissns is not None and len(eissns) > 0:
                rbj.add_identifier(rbj.E_ISSN, eissns[0])
            if pissns is not None and len(pissns) > 0:
                rbj.add_identifier(rbj.P_ISSN, pissns[0])
        except KeyError:
            # No issns, don't worry about it for now
            pass

        return trip
    def test_01_reject_application(self, name, application, application_status, account, prov, current_journal, note, save, raises=None):

        #######################################
        ## set up

        if save == "fail":
            Suggestion.save = mock_save_fail

        ap = None
        journal = None
        if application == "exists":
            ap = Suggestion(**ApplicationFixtureFactory.make_application_source())
            ap.set_application_status(application_status)
            ap.set_id(ap.makeid())
            ap.remove_notes()
            if current_journal == "yes":
                journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
                journal.set_id(journal.makeid())
                journal.set_current_application(ap.id)
                journal.save(blocking=True)
                ap.set_current_journal(journal.id)
            else:
                ap.remove_current_journal()

        acc = None
        if account == "publisher":
            acc = Account(**AccountFixtureFactory.make_publisher_source())
        elif account == "admin":
            acc = Account(**AccountFixtureFactory.make_managing_editor_source())

        provenance = None
        if prov != "none":
            provenance = prov == "true"

        thenote = None
        if note == "yes":
            thenote = "abcdefg"

        ########################################
        ## execute

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.reject_application(ap, acc, provenance, note=thenote)
        else:
            svc.reject_application(ap, acc, provenance, note=thenote)
            time.sleep(1)

            #######################################
            ## Check

            ap2 = Suggestion.pull(ap.id)
            assert ap2 is not None
            assert ap2.application_status == constants.APPLICATION_STATUS_REJECTED
            assert ap2.current_journal is None

            # check the updated and manually updated date are essentially the same (they can theoretically differ
            # by a small amount just based on when they are set)
            updated_spread = abs((ap2.last_updated_timestamp - ap2.last_manual_update_timestamp).total_seconds())
            assert updated_spread <= 1.0

            if current_journal == "yes" and journal is not None:
                j2 = Journal.pull(journal.id)
                assert j2 is not None
                assert j2.current_application is None
                assert ap2.related_journal == j2.id

            if prov == "true":
                pr = Provenance.get_latest_by_resource_id(ap.id)
                assert pr is not None

            if note == "yes":
                assert len(ap2.notes) == 1
                assert ap2.notes[0].get("note") == "abcdefg"
            elif note == "no":
                assert len(ap2.notes) == 0
def journals_applications_provenance(outfile_applications, outfile_accounts,
                                     outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(
            outfile_accounts, "wb",
            "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Last Updated", "Application Status",
            "Published Diff", "Latest Edit Recorded",
            "Latest Accepted Recorded"
        ])

        out_accounts = csv.writer(g)
        out_accounts.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ",
            "Missing Account ID"
        ])

        out_reapps = csv.writer(h)
        out_reapps.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Created",
            "Application Last Updated", "Application Last Manual Update",
            "Application Status", "Published Diff"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(
                latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.last_updated, latest.application_status, diff,
                    last_edit, last_accept
                ])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(
            ):
                out_reapps.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.created_date, latest.last_updated,
                    latest.last_manual_update, latest.application_status, mdiff
                ])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request,
                    str(journal.is_in_doaj()), "NO OWNER"
                ])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([
                        journal.id, journal.created_date,
                        journal.last_update_request,
                        str(journal.is_in_doaj()), owner
                    ])

        print "processed", counter, "journals"
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save(blocking=True)

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save()
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678" # one matching
            pissn = "6789-1234" # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article, owner_id)
        else:
            possible_articles = svc.discover_duplicates(article, owner_id)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[-1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"])

        out_accounts = csv.writer(g)
        out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"])

        out_reapps = csv.writer(h)
        out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj():
                out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner])

        print "processed", counter, "journals"
Beispiel #51
0
    def test_01_update_request(self, name, journal_id, journal_lock,
                               account, account_role, account_is_owner,
                               current_applications, application_lock, application_status,
                               completed_applications, raises,
                               return_app, return_jlock, return_alock,
                               db_jlock, db_alock, db_app):

        ###############################################
        ## set up

        # create the journal
        journal = None
        jid = None
        if journal_id == "valid":
            journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
            journal.remove_related_applications()
            journal.remove_current_application()
            jid = journal.id
        elif journal_id == "not_in_doaj":
            journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=False))
            journal.remove_related_applications()
            journal.remove_current_application()
            jid = journal.id
        elif journal_id == "missing":
            jid = uuid.uuid4().hex

        acc = None
        if account == "yes":
            acc = Account(**AccountFixtureFactory.make_publisher_source())
            if account_role == "none":
                acc.remove_role("publisher")
            elif account_role == "admin":
                acc.remove_role("publisher")
                acc.add_role("admin")
            acc.set_id(acc.makeid())
            if account_is_owner == "yes":
                acc.set_id(journal.owner)

        if journal_lock == "yes":
            lock.lock("journal", jid, "someoneelse", blocking=True)

        latest_app = None
        current_app_count = int(current_applications)
        for i in range(current_app_count):
            app = Suggestion(**ApplicationFixtureFactory.make_application_source())
            app.set_id(app.makeid())
            app.set_created("198" + str(i) + "-01-01T00:00:00Z")
            app.set_current_journal(jid)
            app.save()
            latest_app = app
            if journal is not None:
                journal.set_current_application(app.id)

        comp_app_count = int(completed_applications)
        for i in range(comp_app_count):
            app = Suggestion(**ApplicationFixtureFactory.make_application_source())
            app.set_id(app.makeid())
            app.set_created("197" + str(i) + "-01-01T00:00:00Z")
            app.set_related_journal(jid)
            app.save()
            if journal is not None:
                journal.add_related_application(app.id, date_accepted=app.created_date)

        if current_app_count == 0 and comp_app_count == 0:
            # save at least one record to initialise the index mapping, otherwise tests fail
            app = Suggestion(**ApplicationFixtureFactory.make_application_source())
            app.set_id(app.makeid())
            app.save()

        if application_lock == "yes":
            lock.lock("suggestion", latest_app.id, "someoneelse", blocking=True)

        if application_status != "n/a":
            latest_app.set_application_status(application_status)
            latest_app.save(blocking=True)

        # finally save the journal record, ensuring we get a blocking save, so everything
        # above here should be synchronised with the repo
        if journal is not None:
            journal.save(blocking=True)

        ###########################################################
        # Execution

        svc = DOAJ.applicationService()
        if raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.update_request_for_journal(jid, acc)
        else:
            application, jlock, alock = svc.update_request_for_journal(jid, acc)

            # we need to sleep, so the index catches up
            time.sleep(1)

            if return_app == "none":
                assert application is None
            elif return_app == "yes":
                assert application is not None

            if return_jlock == "none":
                assert jlock is None
            elif return_jlock == "yes":
                assert jlock is not None

            if return_alock == "none":
                assert alock is None
            elif return_alock == "yes":
                assert alock is not None

            if db_jlock == "no" and acc is not None:
                assert not lock.has_lock("journal", jid, acc.id)
            elif db_jlock == "yes" and acc is not None:
                assert lock.has_lock("journal", jid, acc.id)

            if db_alock == "no" and application.id is not None and acc is not None:
                assert not lock.has_lock("suggestion", application.id, acc.id)
            elif db_alock == "yes" and application.id is not None and acc is not None:
                assert lock.has_lock("suggestion", application.id, acc.id)

            if db_app == "no" and application.id is not None:
                indb = Suggestion.q2obj(q="id.exact:" + application.id)
                assert indb is None
            elif db_app == "yes" and application.id is not None:
                indb = Suggestion.q2obj(q="id.exact:" + application.id)
                assert indb is not None

            if current_app_count == 0 and comp_app_count == 0 and application is not None:
                assert application.article_metadata is None
                assert application.articles_last_year is None
            elif application is not None:
                assert application.article_metadata is not None
                assert application.articles_last_year is not None