Exemple #1
0
    def setUp(self):
        self.init_index()

        app.config["STORE_IMPL"] = "portality.store.StoreLocal"
        app.config["STORE_LOCAL_DIR"] = paths.rel2abs(__file__, "..", "tmp",
                                                      "store", "main")
        app.config["STORE_TMP_DIR"] = paths.rel2abs(__file__, "..", "tmp",
                                                    "store", "tmp")
Exemple #2
0
    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""

        # Create 2 identical articles, a duplicate pair
        article1 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a1_doi = article1.bibjson().get_identifiers('doi')
        assert a1_doi is not None
        article1.save(blocking=True)

        time.sleep(1)

        article2 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a2_doi = article2.bibjson().get_identifiers('doi')
        assert a2_doi == a1_doi
        article2.save(blocking=True)

        # Run the reporting task
        user = app.config.get("SYSTEM_USERNAME")
        job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(
            user, outdir=TMP_DIR)
        task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(
            job)
        task.run()

        # The audit log should show we saved the reports to the TMP_DIR defined above
        audit_1 = job.audit.pop(0)
        assert audit_1.get('message', '').endswith(TMP_DIR)
        assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' +
                              dates.today() + '.csv')

        # It should also clean up its interim article csv
        assert not os.path.exists(
            paths.rel2abs(__file__, 'tmp_article_duplicate_report'))

        # The duplicates should be detected and appear in the report and audit summary count
        with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() +
                  '.csv') as f:
            csvlines = f.readlines()
            # We expect one result line + headings: our newest article has 1 duplicate
            res = csvlines.pop()
            assert res.startswith(
                article2.id
            )  # The newest comes first, so article1 is article2's duplicate.
            assert article1.id in res
            assert 'doi+fulltext' in res

        audit_2 = job.audit.pop(0)
        assert audit_2.get(
            'message', ''
        ) == '2 articles processed for duplicates. 1 global duplicate sets found.'
    def setUp(self):
        super(TestPublicDataDumpTask, self).setUp()

        self.store_tmp_imp = app.config.get("STORE_TMP_IMPL")
        self.store_imp = app.config.get("STORE_IMPL")
        self.discovery_records_per_file = app.config.get("DISCOVERY_RECORDS_PER_FILE")
        self.store_local_dir = app.config["STORE_LOCAL_DIR"]
        self.store_tmp_dir = app.config["STORE_TMP_DIR"]
        self.cache = models.Cache

        app.config["STORE_IMPL"] = "portality.store.StoreLocal"
        app.config["STORE_LOCAL_DIR"] = rel2abs(__file__, "..", "tmp", "store", "main")
        app.config["STORE_TMP_DIR"] = rel2abs(__file__, "..", "tmp", "store", "tmp")
        os.makedirs(app.config["STORE_LOCAL_DIR"])
        os.makedirs(app.config["STORE_TMP_DIR"])

        models.cache.Cache = ModelCacheMockFactory.in_memory()
Exemple #4
0
    def setUp(self):
        super(TestPublicDataDumpTask, self).setUp()

        self.store_tmp_imp = app.config.get("STORE_TMP_IMPL")
        self.store_imp = app.config.get("STORE_IMPL")
        self.discovery_records_per_file = app.config.get(
            "DISCOVERY_RECORDS_PER_FILE")
        self.store_local_dir = app.config["STORE_LOCAL_DIR"]
        self.store_tmp_dir = app.config["STORE_TMP_DIR"]
        self.cache = models.Cache

        app.config["STORE_IMPL"] = "portality.store.StoreLocal"
        app.config["STORE_LOCAL_DIR"] = rel2abs(__file__, "..", "tmp", "store",
                                                "main")
        app.config["STORE_TMP_DIR"] = rel2abs(__file__, "..", "tmp", "store",
                                              "tmp")
        os.makedirs(app.config["STORE_LOCAL_DIR"])
        os.makedirs(app.config["STORE_TMP_DIR"])

        models.cache.Cache = ModelCacheMockFactory.in_memory()
    def tearDown(self):
        app.config["STORE_TMP_IMPL"] = self.store_tmp_imp
        app.config["STORE_IMPL"] = self.store_imp
        app.config["DISCOVERY_RECORDS_PER_FILE"] = self.discovery_records_per_file

        shutil.rmtree(rel2abs(__file__, "..", "tmp"))
        app.config["STORE_LOCAL_DIR"] = self.store_local_dir
        app.config["STORE_TMP_DIR"] = self.store_tmp_dir

        models.cache.Cache = self.cache

        super(TestPublicDataDumpTask, self).tearDown()
Exemple #6
0
def load_from_matrix(filename, test_ids):
    if test_ids is None:
        test_ids = []
    with open(paths.rel2abs(__file__, "matrices", filename)) as f:
        reader = csv.reader(f)
        reader.next()   # pop the header row
        cases = []
        for row in reader:
            if row[0] in test_ids or len(test_ids) == 0:
                row[0] = "row_id_" + row[0]
                cases.append(tuple(row))
        return cases
    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""

        # Create 2 identical articles, a duplicate pair
        article1 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True
        ))
        a1_doi = article1.bibjson().get_identifiers('doi')
        assert a1_doi is not None
        article1.save(blocking=True)

        time.sleep(1)

        article2 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True
        ))
        a2_doi = article2.bibjson().get_identifiers('doi')
        assert a2_doi == a1_doi
        article2.save(blocking=True)

        # Run the reporting task
        user = app.config.get("SYSTEM_USERNAME")
        job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(user, outdir=TMP_DIR)
        task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(job)
        task.run()

        # The audit log should show we saved the reports to the TMP_DIR defined above
        audit_1 = job.audit.pop(0)
        assert audit_1.get('message', '').endswith(TMP_DIR)
        assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv')

        # It should also clean up its interim article csv
        assert not os.path.exists(paths.rel2abs(__file__, 'tmp_article_duplicate_report'))

        # The duplicates should be detected and appear in the report and audit summary count
        with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') as f:
            csvlines = f.readlines()
            # We expect one result line + headings: our newest article has 1 duplicate
            res = csvlines.pop()
            assert res.startswith(article2.id)            # The newest comes first, so article1 is article2's duplicate.
            assert article1.id in res
            assert 'doi+fulltext' in res

        audit_2 = job.audit.pop(0)
        assert audit_2.get('message', '') == '2 articles processed for duplicates. 1 global duplicate sets found.'
Exemple #8
0
def load_from_matrix(filename, test_ids):
    if test_ids is None:
        test_ids = []
    with open(paths.rel2abs(__file__, "matrices", filename)) as f:
        reader = csv.reader(f)
        reader.next()  # pop the header row
        cases = []
        for row in reader:
            if row[0] in test_ids or len(test_ids) == 0:
                row[0] = "row_id_" + row[0]
                cases.append(tuple(row))
        return cases
Exemple #9
0
    def tearDown(self):
        app.config["STORE_TMP_IMPL"] = self.store_tmp_imp
        app.config["STORE_IMPL"] = self.store_imp
        app.config[
            "DISCOVERY_RECORDS_PER_FILE"] = self.discovery_records_per_file

        shutil.rmtree(rel2abs(__file__, "..", "tmp"))
        app.config["STORE_LOCAL_DIR"] = self.store_local_dir
        app.config["STORE_TMP_DIR"] = self.store_tmp_dir

        models.cache.Cache = self.cache

        super(TestPublicDataDumpTask, self).tearDown()
def prepare_update_publisher_load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "article_create_article"), "prepare_update_publisher",
                               "test_id",
                               {"test_id": []})
Exemple #11
0
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "tasks.public_data_dump"),
        "data_dump", "test_id", {"test_id": []})
def load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "article_batch_create_article"), "batch_create_article", "test_id",
                               {"test_id" : []})
Exemple #13
0
#MAIL_PASSWORD              # default None
#MAIL_DEFAULT_SENDER        # default None
#MAIL_MAX_EMAILS            # default None
#MAIL_SUPPRESS_SEND         # default app.testing

# ================================
# File store

# put this in your production.cfg, to store on S3:
# STORE_IMPL = "portality.store.StoreS3"

STORE_IMPL = "portality.store.StoreLocal"
STORE_TMP_IMPL = "portality.store.TempStore"

from portality.lib import paths
STORE_LOCAL_DIR = paths.rel2abs(__file__, "..", "local_store", "main")
STORE_TMP_DIR = paths.rel2abs(__file__, "..", "local_store", "tmp")
STORE_LOCAL_EXPOSE = False  # if you want to allow files in the local store to be exposed under /store/<path> urls.  For dev only.

# containers (buckets in AWS) where various content will be stored
# These values are placeholders, and must be overridden in live deployment
# this prevents test environments from accidentally writing to the production buckets
STORE_ANON_DATA_CONTAINER = "doaj-anon-data-placeholder"
STORE_CACHE_CONTAINER = "doaj-data-cache-placeholder"
STORE_PUBLIC_DATA_DUMP_CONTAINER = "doaj-data-dump-placeholder"


# S3 credentials for relevant scopes
STORE_S3_SCOPES = {
    "anon_data" : {
        "aws_access_key_id" : "put this in your dev/test/production.cfg",
YEAR_COMPLETE_OUTPUT = [
    ["User", "2015"],
    ["u1", 15],
    ["u2", 40],
    ["u3", 81]
]

APPLICATION_YEAR_OUTPUT = [
    ["Country", "2010", "2011", "2012", "2013", "2014", "2015"],
    ["Angola", 0, 1, 2, 3, 4, 5],
    ["Belarus", 6, 7, 8 , 9, 10, 0],
    [u"Cambôdia", 11, 12, 13, 14, 15, 16]
]

TMP_DIR = paths.rel2abs(__file__, "resources/reports")


class TestReporting(DoajTestCase):
    def setUp(self):
        super(TestReporting, self).setUp()
        if os.path.exists(TMP_DIR):
            shutil.rmtree(TMP_DIR)
        os.mkdir(TMP_DIR)

    def tearDown(self):
        super(TestReporting, self).tearDown()
        shutil.rmtree(TMP_DIR)

    def _as_output(self, table):
        table = deepcopy(table)
Exemple #15
0
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "article_is_legitimate_owner"),
        "is_legitimate_owner", "test_id", {"test_id": []})
Exemple #16
0
 def setUp(self):
     super(TestSitemap, self).setUp()
     self.cache_dir = app.config.get("CACHE_DIR")
     self.tmp_dir = paths.rel2abs(__file__, "tmp-cache")
     app.config["CACHE_DIR"] = self.tmp_dir
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "article_issn_ownership_status"),
        "issn_ownership_status", "test_id", {"test_id": []})
Exemple #18
0
def load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "bll_journal_csv"), "journal_csv", "test_id",
                               {"test_id" : []})
def load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "article_discover_duplicates"), "discover_duplicates", "test_id",
                               {"test_id" : []})
Exemple #20
0
    def setUp(self):
        self.init_index()

        app.config["STORE_IMPL"] = "portality.store.StoreLocal"
        app.config["STORE_LOCAL_DIR"] = paths.rel2abs(__file__, "..", "tmp", "store", "main")
        app.config["STORE_TMP_DIR"] = paths.rel2abs(__file__, "..", "tmp", "store", "tmp")
Exemple #21
0
                        help='Schema version for the target XSD, e.g. 2.1',
                        required=True)
    parser.add_argument('-f',
                        '--filename',
                        help='filename for schema, including extension',
                        default='iso_639-2b.xsd')
    parser.add_argument(
        '-c',
        '--compare',
        help='Write a comparison of new and old schemas (optional: filename)',
        nargs='?',
        const='isolang_diff.html',
        default=None)

    args = parser.parse_args()
    dest_path = paths.rel2abs(__file__, '..', 'static', 'doaj', args.filename)

    # Retain our current languages file if we are overwriting it
    if os.path.exists(dest_path):
        print('Schema already exists with name {n} - replace? [y/N]'.format(
            n=args.filename))
        today = datetime.utcnow().strftime("%Y-%m-%d")
        resp = input(
            'Your existing file will be retained as {fn}.old.{td} : '.format(
                fn=args.filename, td=today))
        if resp.lower() == 'y':
            os.rename(dest_path, dest_path + '.old.' + today)

    # Write the new schema file
    with open(dest_path, 'wb') as f:
        write_lang_schema(f, args.version)
Exemple #22
0
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "bll_journal_csv"), "journal_csv",
        "test_id", {"test_id": []})
def load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "tasks.public_data_dump"), "data_dump", "test_id",
                               {"test_id" : []})
Exemple #24
0
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "article_discover_duplicates"),
        "discover_duplicates", "test_id", {"test_id": []})
 def setUp(self):
     super(TestScriptsAccountsWithMarketingConsent, self).setUp()
     self.tmp_dir = paths.rel2abs(__file__, "tmp_data")
     if os.path.exists(self.tmp_dir):
         shutil.rmtree(self.tmp_dir)
     os.mkdir(self.tmp_dir)
Exemple #26
0
 def tearDown(self):
     self.destroy_index()
     for f in self.list_today_article_history_files(
     ) + self.list_today_journal_history_files():
         os.remove(f)
     shutil.rmtree(paths.rel2abs(__file__, "..", "tmp"), ignore_errors=True)
Exemple #27
0
#MAIL_PASSWORD              # default None
#MAIL_DEFAULT_SENDER        # default None
#MAIL_MAX_EMAILS            # default None
#MAIL_SUPPRESS_SEND         # default app.testing

# ================================
# File store

# put this in your production.cfg, to store on S3:
# STORE_IMPL = "portality.store.StoreS3"

STORE_IMPL = "portality.store.StoreLocal"
STORE_TMP_IMPL = "portality.store.TempStore"

from portality.lib import paths
STORE_LOCAL_DIR = paths.rel2abs(__file__, "..", "local_store", "main")
STORE_TMP_DIR = paths.rel2abs(__file__, "..", "local_store", "tmp")
STORE_LOCAL_EXPOSE = False  # if you want to allow files in the local store to be exposed under /store/<path> urls.  For dev only.

# containers (buckets in AWS) where various content will be stored
# These values are placeholders, and must be overridden in live deployment
# this prevents test environments from accidentally writing to the production buckets
STORE_ANON_DATA_CONTAINER = "doaj-anon-data-placeholder"
STORE_CACHE_CONTAINER = "doaj-data-cache-placeholder"
STORE_PUBLIC_DATA_DUMP_CONTAINER = "doaj-data-dump-placeholder"


# S3 credentials for relevant scopes
STORE_S3_SCOPES = {
    "anon_data" : {
        "aws_access_key_id" : "put this in your dev/test/production.cfg",
from portality.core import app
from doajtest.helpers import DoajTestCase
from doajtest.fixtures import ArticleFixtureFactory
from portality.tasks import article_duplicate_report
from portality.lib import paths
from portality import models
from portality.lib import dates
from portality.clcsv import UnicodeReader

from collections import OrderedDict

import time
import os
import shutil

TMP_DIR = paths.rel2abs(__file__, "resources/article_duplicates_report")


class TestArticleMatch(DoajTestCase):

    def setUp(self):
        super(TestArticleMatch, self).setUp()
        if os.path.exists(TMP_DIR):
            shutil.rmtree(TMP_DIR)
        os.mkdir(TMP_DIR)

    def tearDown(self):
        super(TestArticleMatch, self).tearDown()
        shutil.rmtree(TMP_DIR)

    def test_01_duplicates_report(self):
Exemple #29
0
 def setUp(self):
     super(TestSitemap, self).setUp()
     self.cache_dir = app.config.get("CACHE_DIR")
     self.tmp_dir = paths.rel2abs(__file__, "tmp-cache")
     app.config["CACHE_DIR"] = self.tmp_dir
def load_cases():
    return load_parameter_sets(
        rel2abs(__file__, "..", "matrices", "article_batch_create_article"),
        "batch_create_article", "test_id", {"test_id": []})
Exemple #31
0
from portality.core import app
from doajtest.helpers import DoajTestCase
from doajtest.fixtures import ArticleFixtureFactory
from portality.tasks import article_duplicate_report
from portality.lib import paths
from portality import models
from portality.lib import dates
from portality.clcsv import UnicodeReader

from collections import OrderedDict

import time
import os
import shutil

TMP_DIR = paths.rel2abs(__file__, "resources/article_duplicates_report")


class TestArticleMatch(DoajTestCase):
    def setUp(self):
        super(TestArticleMatch, self).setUp()
        if os.path.exists(TMP_DIR):
            shutil.rmtree(TMP_DIR)
        os.mkdir(TMP_DIR)

    def tearDown(self):
        super(TestArticleMatch, self).tearDown()
        shutil.rmtree(TMP_DIR)

    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""
def load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "lib_normalise_url"), "normalise_url", "test_id",
                               {"test_id" : []})
def is_acceptable_load_cases():
    return load_parameter_sets(rel2abs(__file__, "..", "matrices", "article_create_article"), "is_acceptable",
                               "test_id",
                               {"test_id": []})