Ejemplo n.º 1
0
    def test_01_full_synchronous(self):
        # first pretend to do the file upload, using the test submission
        fh = FileHandle(open(TEST_SUBMISSION, "r").read())
        job = workflow.csv_upload(fh, "test_submission.csv", "*****@*****.**")
        time.sleep(2)

        # now call the overall job processor
        workflow.process_jobs()

        # once the job processor is finished, we can export the csv for the job we ran
        csvcontent = workflow.output_csv(job)
        with codecs.open(os.path.join(UPLOAD_DIR, "output.csv"), "wb") as f:
            f.write(csvcontent)
Ejemplo n.º 2
0
    def test_02_full_asynchronous(self):
        # first pretend to do the file upload, using the test submission
        fh = FileHandle(open(FULL_SUBMISSION, "r").read())
        job = workflow.csv_upload(fh, "full_submission.csv", "*****@*****.**")
        time.sleep(2)

        # now call the overall job processor
        workflow.process_jobs()

        # once the job processor returns, we must monitor the job itself for completeness
        for i in range(100):
            pc = job.pc_complete
            print i, pc
            if int(pc) == 100:
                break
            time.sleep(2)

        # once the job processor is finished, we can export the csv for the job we ran
        csvcontent = workflow.output_csv(job)
        with codecs.open(os.path.join(UPLOAD_DIR, "output.csv"), "wb", "utf8") as f:
            f.write(csvcontent)
Ejemplo n.º 3
0
    def test_01_export(self):
        # make a job - we don't much care about its content for this test
        job = models.SpreadsheetJob()
        job.save()

        now = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")

        # make a few records for it

        # all fields filled in correctly
        r1 = models.Record()
        r1.pmcid = "PMC1234"
        r1.pmid = "1234"
        r1.doi = "10.1234"
        r1.title = "The Title"
        r1.has_ft_xml = True
        r1.in_epmc = True
        r1.aam = True
        r1.is_oa = True
        r1.licence_type = "CC0"
        r1.licence_source = "publisher"
        r1.journal_type = "hybrid"
        r1.confidence = 0.9
        r1.add_provenance("test", "provenance", now)
        r1.upload_id = job.id
        r1.upload_pos = 1
        r1.journal = "Journal of Science"
        r1.issn = ["1234-5678", "9876-5432"]
        r1.save()

        r2 = models.Record()
        r2.pmcid = "PMC9876"
        r2.upload_id = job.id
        r2.upload_pos = 2
        r2.save()

        r3 = models.Record()
        r3.pmid = "9876"
        r3.upload_id = job.id
        r3.upload_pos = 3
        r3.title = None
        r3.licence_type = ""
        r3.add_provenance("test", "provenance", now)
        r3.add_provenance("test", "more", now)
        r3.save()

        # refresh the index ready for querying
        models.SpreadsheetJob.refresh()
        models.Record.refresh()

        out = workflow.output_csv(job)

        s = StringIO(out)
        reader = csv.reader(s)
        rows = [r for r in reader]

        assert len(rows) == 4
        assert rows[0] == [
            'PMCID', 'PMID', 'DOI', "Journal title", "ISSN", 'Article title',
            "Fulltext in EPMC?", 'XML Fulltext?', 'AAM?', 'Open Access?',
            'Licence', 'Licence Source', 'Journal Type',
            'Correct Article Confidence', 'Standard Compliance?',
            'Deluxe Compliance?', 'Compliance Processing Ouptut'
        ]
        assert rows[1] == [
            'PMC1234', '1234', '10.1234', "Journal of Science",
            "1234-5678, 9876-5432", 'The Title', "True", 'True', 'True',
            'True', 'CC0', 'publisher', 'hybrid', '0.9', "True", "True",
            '[' + now + ' test] provenance'
        ]
        assert rows[2] == [
            "PMC9876", "", "", "", "", "", "", "", "unknown", "", "unknown",
            "", "", "", "False", "False", ""
        ]
        assert rows[3] == [
            "", "9876", "", "", "", "", "", "", "unknown", "", "unknown", "",
            "", "", "False", "False",
            '[' + now + ' test] provenance\n\n[' + now + ' test] more'
        ]
Ejemplo n.º 4
0
    if args.type.lower() == "pmcid":
        record.pmcid = args.identifier
    elif args.type.lower() == "pmid":
        record.pmid = args.identifier
    elif args.type.lower() == "doi":
        record.doi = args.identifier
    record.save()

    time.sleep(2)

    oag_register = []
    msg = workflow.WorkflowMessage(job, record, oag_register)
    workflow.process_record(msg)
    workflow.process_oag(oag_register, job)

    time.sleep(2)

    i = 0
    while True:
        i += 1
        pcc = job.pc_complete
        print i, job.pc_complete, "%",
        sys.stdout.flush()
        if int(pcc) == 100:
            break
        time.sleep(2)

    out = workflow.output_csv(job)
    print out
Ejemplo n.º 5
0
    def test_01_export(self):
        # make a job - we don't much care about its content for this test
        job = models.SpreadsheetJob()
        job.save()

        now = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")

        # make a few records for it

        # all fields filled in correctly
        r1 = models.Record()
        r1.pmcid = "PMC1234"
        r1.pmid = "1234"
        r1.doi = "10.1234"
        r1.title = "The Title"
        r1.has_ft_xml = True
        r1.in_epmc = True
        r1.aam = True
        r1.is_oa = True
        r1.licence_type = "CC0"
        r1.licence_source = "publisher"
        r1.journal_type = "hybrid"
        r1.confidence = 0.9
        r1.add_provenance("test", "provenance", now)
        r1.upload_id = job.id
        r1.upload_pos = 1
        r1.journal = "Journal of Science"
        r1.issn = ["1234-5678", "9876-5432"]
        r1.save()

        r2 = models.Record()
        r2.pmcid = "PMC9876"
        r2.upload_id = job.id
        r2.upload_pos = 2
        r2.save()

        r3 = models.Record()
        r3.pmid = "9876"
        r3.upload_id = job.id
        r3.upload_pos = 3
        r3.title = None
        r3.licence_type = ""
        r3.add_provenance("test", "provenance", now)
        r3.add_provenance("test", "more", now)
        r3.save()


        # refresh the index ready for querying
        models.SpreadsheetJob.refresh()
        models.Record.refresh()

        out = workflow.output_csv(job)

        s = StringIO(out)
        reader = csv.reader(s)
        rows = [r for r in reader]

        assert len(rows) == 4
        assert rows[0] == ['PMCID', 'PMID', 'DOI', "Journal title", "ISSN", 'Article title', "Fulltext in EPMC?", 'XML Fulltext?', 'AAM?', 'Open Access?', 'Licence', 'Licence Source', 'Journal Type', 'Correct Article Confidence', 'Standard Compliance?', 'Deluxe Compliance?', 'Compliance Processing Ouptut']
        assert rows[1] == ['PMC1234', '1234', '10.1234', "Journal of Science", "1234-5678, 9876-5432", 'The Title', "True", 'True', 'True', 'True', 'CC0', 'publisher', 'hybrid', '0.9', "True", "True", '[' + now + ' test] provenance']
        assert rows[2] == ["PMC9876", "", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", ""]
        assert rows[3] == ["", "9876", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", '[' + now + ' test] provenance\n\n[' + now + ' test] more']
Ejemplo n.º 6
0
    if args.type.lower() == "pmcid":
        record.pmcid = args.identifier
    elif args.type.lower() == "pmid":
        record.pmid = args.identifier
    elif args.type.lower() == "doi":
        record.doi = args.identifier
    record.save()

    time.sleep(2)

    oag_register = []
    msg = workflow.WorkflowMessage(job, record, oag_register)
    workflow.process_record(msg)
    workflow.process_oag(oag_register, job)

    time.sleep(2)

    i = 0
    while True:
        i += 1
        pcc = job.pc_complete
        print i, job.pc_complete, "%",
        sys.stdout.flush()
        if int(pcc) == 100:
            break
        time.sleep(2)

    out = workflow.output_csv(job)
    print out