def test_01_spreadsheet(self): s = SpreadsheetJob() s.filename = "myfile.csv" s.contact_email = "*****@*****.**" s.status_code = "processing" s.status_message = "currently working on it!" assert s.filename == "myfile.csv" assert s.contact_email == "*****@*****.**" assert s.status_code == "processing" assert s.status_message == "currently working on it!"
def test_04_pc_complete(self): job = SpreadsheetJob() job.save() # a record with no completeness r = Record() r.upload_id = job.id r.save() # a record with epmc complete r2 = Record() r2.upload_id = job.id r2.epmc_complete = True r2.save() # a record with both complete r3 = Record() r3.upload_id = job.id r3.epmc_complete = True r3.oag_complete = True r3.save() time.sleep(1) comp = job.pc_complete assert int(comp) == 50 r.epmc_complete = True r.save() time.sleep(1) comp = job.pc_complete assert int(comp) == 66 r.oag_complete = True r2.oag_complete = True r.save() r2.save() time.sleep(1) comp = job.pc_complete assert int(comp) == 100
def test_06_duplicates(self): # first make ourselves a job to work on job = SpreadsheetJob() job.save() # now make a bunch of records, some unique and some duplicate # unique pmcid r = Record() r.upload_id = job.id r.pmcid = "PMCunique" r.save() # duplicate pmcid r = Record() r.upload_id = job.id r.pmcid = "PMCdupe" r.save() r = Record() r.upload_id = job.id r.pmcid = "PMCdupe" r.save() # unique pmid r = Record() r.upload_id = job.id r.pmid = "unique" r.save() # duplicate pmid r = Record() r.upload_id = job.id r.pmid = "dupe" r.save() r = Record() r.upload_id = job.id r.pmid = "dupe" r.save() # unique doi r = Record() r.upload_id = job.id r.doi = "10.unique" r.save() # duplicate pmcid r = Record() r.upload_id = job.id r.doi = "10.dupe" r.save() r = Record() r.upload_id = job.id r.doi = "10.dupe" r.save() # one that is a duplicate of everything r = Record() r.upload_id = job.id r.pmcid = "PMCdupe" r.pmid = "dupe" r.doi = "10.dupe" r.save() # one that is confused about its duplication r = Record() r.upload_id = job.id r.pmcid = "PMCdupe" r.pmid = "dupe" r.doi = "10.notdupe" r.save() time.sleep(2) dupes = job.list_duplicate_identifiers() # check the structure of the response assert "pmcid" in dupes assert "pmid" in dupes assert "doi" in dupes # check the contentes assert len(dupes["pmcid"]) == 1 assert "PMCdupe" in dupes["pmcid"] assert len(dupes["pmid"]) == 1 assert "dupe" in dupes["pmid"] assert len(dupes["doi"]) == 1 assert "10.dupe" in dupes["doi"]