def testFetchWithLargeList(self): f = open('prokaryotes.txt') bioprojects = CSVBioProjectParser.csv_parse_stream(f) bioprojects = bioprojects[1:200] f.close() testlist = map(lambda bioproject: bioproject.bioproject_id, bioprojects) handler = BioProjectSaxHandler(self.logger) results, errors = self.efetch.fetch_uids(handler, testlist) # Removed because the CSV file potentially contains duplicates #self.assertEquals(len(results) + len(errors), len(testlist)) for value in testlist: if( not results.has_key(value) ): self.assertIn(value, errors)
def testFetchWithLargeList(self): f = open('prokaryotes.txt') bioprojects = CSVBioProjectParser.csv_parse_stream(f) bioprojects = bioprojects[1:200] f.close() testlist = map(lambda bioproject: bioproject.bioproject_id, bioprojects) handler = BioProjectSaxHandler(self.logger) results, errors = self.efetch.fetch_uids(handler, testlist) # Removed because the CSV file potentially contains duplicates #self.assertEquals(len(results) + len(errors), len(testlist)) for value in testlist: if (not results.has_key(value)): self.assertIn(value, errors)
def testParseCSV(self): f = open('test.csv', 'wb') #I apologize for the ugliness here... test_str = '''#Organism/Name\tBioProject Accession\tBioProject ID\tGroup\tSubGroup\tSize (Mb)\tGC%\tChromosomes/RefSeq\tChromosomes/INSDC\tPlasmids/RefSeq\tPlasmids/INSDC\tWGS\tScaffolds\tGenes\tProteins\tRelease Date\tModify Date\tStatus\tCenter Campylobacter jejuni subsp. jejuni DFVF1099\tPRJNA41639\t41639\tProteobacteria\tdelta/epsilon subdivisions\t1.73386\t30.4\t-\t-\t-\t-\tADHK01\t71\t1964\t1920\t2011/01/03\t2011/09/16\tScaffolds or contigs\tFaculty of Life Sciences, Department of Food Science, University of Copenhagen Campylobacter jejuni subsp. jejuni 305\tPRJNA41641\t41641\tProteobacteria\tdelta/epsilon subdivisions\t1.80827\t30.4\t-\t-\t-\t-\tADHL01\t333\t2268\t2138\t2011/01/03\t2011/09/16\tScaffolds or contigs\tFaculty of Life Sciences, Department of Food Science, University of Copenhagen Campylobacter jejuni subsp. jejuni 327\tPRJNA41643\t41643\tProteobacteria\tdelta/epsilon subdivisions\t1.61861\t30.5\t-\t-\t-\t-\tADHM01\t48\t1786\t1711\t2011/01/03\t2011/05/12\tScaffolds or contigs\tFaculty of Life Sciences, Department of Food Science, University of Copenhagen Campylobacter jejuni subsp. jejuni 414\tPRJNA43389\t43389\tProteobacteria\tdelta/epsilon subdivisions\t1.71012\t30\tNZ_CM000855.1\tCM000855.1\t-\t-\tADGM01\t1\t1840\t1800\t2010/01/12\t2010/06/16\tComplete\tUniversity of Liverpool''' f.write(test_str) f.close() t = open('test.csv', 'rb') bioprojects = CSVBioProjectParser.csv_parse_stream(t) t.close() self.assertEqual( len(bioprojects) , 4) self.assertIsInstance( bioprojects[0], BioProject) for bioproject in bioprojects: self.assertIn( bioproject.bioproject_id, [41639, 41641, 41643, 43389] )