def test_error_single_pos_single(self): self.args_single.interval = None self.args_single.positions_file = "unit_test/positions_missed.pos" self.classifier = bamutils.Classifier(self.args_single) self.classifier.get_positions() self.target_positions = self.classifier.target_positions temp_target_positions = [] for target_position in self.target_positions: if target_position[1] == None and target_position[2] == None: break if target_position[1] == target_position[2]: temp_target_positions.append(target_position) tuples = self.classifier.bam.get_tuples(temp_target_positions) for it in tuples: temp_target_positions = [] target_position[1] = target_position[1] - 1 temp_target_positions.append(target_position) newtuples = self.classifier.bam.get_tuples( temp_target_positions) for new_it in newtuples: if it[0] == new_it[0]: self.assertEqual( new_it, it, 'Error reading this' ' sequence from the bam file(' + str(it[0]) + ')' ', please use range instead')
def test_error_single_pos_paired(self): self.args_paired.interval = None self.args_paired.positions_file = "unit_test/positions_missed.pos" self.classifier = bamutils.Classifier(self.args_paired) self.classifier.get_positions() self.target_positions = self.classifier.target_positions #get the alignment for the single alignment(n) and #then for the range(n-1,n), compare temp_target_positions = [] for target_position in self.target_positions: if target_position[1] == None and target_position[2] == None: break if target_position[1] == target_position[2]: temp_target_positions.append(target_position) tuples = self.classifier.bam.get_tuples(temp_target_positions) for tt, nt in tuples: temp_target_positions = [] target_position[1] = target_position[1] - 1 temp_target_positions.append(target_position) newtuples = self.classifier.bam.get_tuples( temp_target_positions) for new_tt, new_nt in newtuples: if nt[0] == new_nt[0] and tt[0] == new_tt[0]: self.assertEqual( new_tt, tt, 'Error reading this' ' sequence from the bam file(' + str(tt[0]) + ')' ', please use range instead') self.assertEqual( new_nt, nt, 'Error reading this' ' sequence from the bam file(' + str(tt[0]) + ')' ', please use range instead')
def run_classifier(self,args): classifier = bamutils.Classifier(args) classifier.get_positions() features = classifier.get_features() if args.export_features is not None: self.classifier.export_features(features) probabilities = classifier.predict(features) classifier.print_results(probabilities)
def test_get_positions_case7(self): """ whole genome """ args = self.args_paired classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions for val in positions: if not val[1] == None: self.assertEqual(True, False, 'Region specified in whole genome')
def test_reference_base_single(self): if self.args_single.single: classifier = bamutils.Classifier(self.args_single) for chromosome_id in xrange(25): for position in xrange(100): #Throws runtimeerror if unable to get base try: refbase = classifier.bam.get_reference_base( chromosome_id, position, index=True) self.assertRegexpMatches( str(refbase), '[0-4]|[ACGTN]', 'Invalid Trinucleotide_context') except RuntimeError: pass
def test_get_positions_case9(self): """ The interval is a range plus a positions file plus manifest interval:1 manifest: None positions_file: None output: 1 (we are looking for the common region among the positions provided) """ args = self.args_paired args.interval = '1' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual(sorted(positions), sorted([['1', None, None]]))
def test_get_positions_case2(self): """ The interval is a range plus a positions file interval:1:1-1000 positions: 1:1-1000, 1:1000, 1:1-2000, 1:1-20000 output: 1:1-1000 (we are looking for the common region among the positions provided) """ args = self.args_paired args.interval = '1:1-1000' args.positions_file = './unit_test/get_positions_posfile' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual(sorted(positions), sorted([['1', 1, 1000]]))
def test_get_positions_case8(self): """ The interval is a range plus a positions file plus manifest interval:1:1-100 manifest: 1:1-90,1:92-130, 1:500-650, 5:500-700, 6:6000-6500, 1:179076833-179076890 output: 1:1000 (we are looking for the common region among the positions provided) """ args = self.args_paired args.interval = '1:1-100' args.deep = True args.manifest = './unit_test/get_positions_manifest' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual(sorted(positions), sorted([['1', 1, 90], ['1', 92, 100]]))
def test_get_positions_case1(self): """ The interval is a chromosome plus a positions file interval:1 positions: 1:1-20000, 1:21000-30000, 3:1-1000, 5:500-2000, 10:1-20000 output: 1:1-20000, 1:21000-30000 (we are looking for the common region among the positions provided) """ args = self.args_paired args.interval = '1' args.positions_file = './unit_test/get_positions_posfile' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertEqual(sorted(positions), sorted([['1', 1, 20000], ['1', 21000, 30000]]))
def test_get_positions_case10(self): """ The interval is a chromosome plus a positions file interval.intersection(positions file) == [] interval:1 manifest: None positions_file: 1:1-20000,1:21000-30000,3:1-1000,5:500-2000,10:1-20000 output: [] """ args = self.args_paired args.interval = '11' args.positions_file = './unit_test/get_positions_posfile' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual(positions, [])
def test_trinucleotide_context_paired(self): #only need to run it once. if self.args_paired.single: self.assertEqual(True, False, 'Single flag set') else: classifier = bamutils.Classifier(self.args_paired) for chromosome_id in xrange(25): for position in xrange(1000): #Throws runtimeerror if unable to get base try: tc = classifier.bam.get_trinucleotide_context( chromosome_id, position) self.assertRegexpMatches( tc, '[ACGTN]|[ACGTN]|[ACGTN]', 'Invalid Trinucleotide_context') except RuntimeError: pass
def test_get_positions_case3(self): """ case1 + manifest file interval:1 positions: 1:1-1000, 1:1000, 1:1-2000, 1:1-20000 manifest: 1:1-150, 1:500-650, 5:500-700, 6:6000-6500, 1:179076833-179076890 output: 1:500-650, 1:1-150 (we are looking for the common region among the positions provided) """ args = self.args_paired args.interval = '1' args.deep = True args.positions_file = './unit_test/get_positions_posfile' args.manifest = './unit_test/get_positions_manifest' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual( sorted(positions), sorted([['1', 1, 90], ['1', 92, 130], ['1', 500, 650]]))
def main(): args = classifyui.args if args.verbose: level = logging.DEBUG else: level = logging.WARNING logging.basicConfig( filename=args.log_file, format='%(asctime)s %(message)s', #datefmt = '%m/%d/%Y %I:%M:%S %p', level=level) logging.warning("<<< mutationSeq_" + mutationSeq_version + " started >>>") logging.info("importing required modules") import bamutils logging.info(args) #============================================================================== # main body #============================================================================== logging.info("initializing a Classifier") classifier = bamutils.Classifier(args) logging.info("getting positions") classifier.get_positions() logging.info("generating features iterator") features = classifier.get_features() if args.export_features is not None: logging.info("exporting features") features = classifier.export_features(features) if args.features_only: classifier.print_features(features) else: probabilities = classifier.predict(features) classifier.print_results(probabilities) logging.warning("successfully completed.\n")
def test_get_positions_case11(self): """ There is no overlap between manifest and positions file. manifest: 1:1-2000 positions_file: 1:1000 output: 1:1-2000 (since there is no amplicon around the position in pos file, so we dont return anything) There was a bug in the code where lookup of points wasn't correct. i.e self.manifest[val[0]][val[1]:val[2]] return [] if val[1] == val[2] but self.manifest[val[0]][val[1]] doesn't """ args = self.args_paired args.deep = True args.positions_file = './unit_test/get_positions_posfile_case11' args.manifest = './unit_test/get_positions_manifest_case11' classifier = bamutils.Classifier(args) classifier.get_positions() positions = classifier.target_positions self.assertListEqual(sorted(positions), sorted([['1', 1000, 1000]])) pass
def get_tuples(self,args): classifier = bamutils.Classifier(args) classifier.get_positions() pos = classifier.target_positions tuples = classifier.bam.get_tuples(pos) return tuples,classifier
def run_classifier(arguments, reffiles): output_vcf = [] output_folder = arguments.out for i in xrange(len(reffiles)): reference_file = reffiles[i] #parse the pos file file_stream = open(reference_file, 'r') tfile = None nfile = None rfile = None manfile = None output = [] for line in file_stream: l = line.strip().split() if line[0] == '#': if l[1] == 'tumour': tfile = l[2] elif l[1] == 'normal': nfile = l[2] if l[1] == 'reference': rfile = l[2] if l[1] == 'manifest': manfile = l[2] else: output.append(l[0] + ':' + l[1] + '\n') file_stream.close() #update arguments if not all((tfile, nfile, rfile)): logging.error('Invalid input (one of paths is missing)') arguments.out = output_folder + reffiles[i].strip().split( '/')[-1] + '.vcf' #create a positions file for classifier file_stream_w = open(arguments.out + '.tmp', 'w') for line in output: file_stream_w.write(line) file_stream_w.close() output_vcf.append(arguments.out) arguments.interval = None arguments.positions_file = arguments.out + '.tmp' arguments.samples = [ 'tumour:' + tfile, 'normal:' + nfile, 'reference:' + rfile, 'model:' + arguments.model ] arguments.manifest = manfile logging.info("initializing a Classifier") classifier = bamutils.Classifier(arguments) logging.info("getting positions") classifier.get_positions() logging.info("generating features iterator") features = classifier.get_features() if arguments.export_features is not None: logging.info("exporting features") classifier.export_features(features) probabilities = classifier.predict(features) classifier.print_results(probabilities) logging.warning("successfully completed.\n") #remove the positions file os.remove(arguments.out + '.tmp') return output_vcf