def testMulticoreAnnotate(self): """Test a (too) simple annotating exercise from GAF on 2 cores""" gafDatasource = TestUtils.createGafDatasourceProxy(self.config) # Test pickling dump(gafDatasource, file('out/testGAFPickle.pkl', 'w')) m1 = MutationDataFactory.default_create() m1.chr = '3' m1.start = '178866811' m1.end = '178866811' m1.ref_allele = "A" m1.alt_allele = "C" m1.build = "hg19" m2 = MutationDataFactory.default_create() m2.chr = '3' m2.start = '178866812' m2.end = '178866812' m2.ref_allele = "A" m2.alt_allele = "C" m2.build = "hg19" p = LoggingPool(processes=2) result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)]) p.close() p.join() for r in result: self.assertTrue("transcript_id" in r.keys()) self.assertTrue("gene" in r.keys()) self.assertTrue(r["gene"] == "PIK3CA") self.assertTrue(result[0].start != result[1].start)
def testMulticoreAnnotate(self): """Test a (too) simple annotating exercise from GAF on 2 cores""" gafDatasource = TestUtils.createGafDatasourceProxy(self.config) # Test pickling dump(gafDatasource, file('out/testGAFPickle.pkl','w')) m1 = MutationDataFactory.default_create() m1.chr = '3' m1.start = '178866811' m1.end = '178866811' m1.ref_allele = "A" m1.alt_allele = "C" m1.build = "hg19" m2 = MutationDataFactory.default_create() m2.chr = '3' m2.start = '178866812' m2.end = '178866812' m2.ref_allele = "A" m2.alt_allele = "C" m2.build = "hg19" p = LoggingPool(processes=2) result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)]) p.close() p.join() for r in result: self.assertTrue("transcript_id" in r.keys()) self.assertTrue("gene" in r.keys()) self.assertTrue(r["gene"] == "PIK3CA") self.assertTrue(result[0].start != result[1].start)
def testMulticoreAnnotateFromChunkedFile(self): #TODO: Add unit test that Mutation data is pickle-able inputFile = "testdata/maflite/Patient0.snp.maf.txt" outputFile = "out/testGAFMulticorePatient0.snp.maf.txt" chunkSize = 200 numChunks = 4 gafDatasource = TestUtils.createGafDatasourceProxy(self.config) ic = MafliteInputMutationCreator(inputFile) oc = SimpleOutputRenderer(outputFile) # createChunks muts = ic.createMutations() allAnnotatedChunksFlat = [] are_mutations_remaining = True p = LoggingPool(processes=numChunks) while are_mutations_remaining: chunks = [] for j in xrange(0, numChunks): chunk = [] for i in xrange(0, chunkSize): try: chunk.append(muts.next()) except StopIteration: are_mutations_remaining = False break chunks.append((chunk, gafDatasource)) annotatedChunks = p.map(annotate_mutations_global, chunks) annotatedChunksFlat = self._flattenChunks(annotatedChunks) allAnnotatedChunksFlat.append(annotatedChunksFlat) p.close() p.join() annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat) ctr = 0 oc.renderMutations(annotatedMuts, Metadata()) tsvReader = GenericTsvReader(outputFile) for line in tsvReader: ctr += 1 self.assertTrue(ctr == 730, "Should have read 730 variants, but read " + str(ctr))