def processBamFiles(genome): if "main" in inspect.stack()[1][3]: filename = INPUT_DIR + genome + ".bam" else: filename = OUTPUT_DIR + genome + ".bam" command1 = "samtools sort " + filename + " " + genome + ".sorted" command2 = "samtools index " + genome + ".sorted.bam" command3 = ( "samtools mpileup -uD -f ref_files/TB_H37Rv_sequence_validated.fa " + genome + ".sorted.bam > " + OUTPUT_DIR + "mpileup/" + genome + "_mpileup" ) command4 = ( "bcftools call -mv -Ov -Vindels " + OUTPUT_DIR + "mpileup/" + genome + "_mpileup > " + OUTPUT_DIR + "vcf/" + genome + "_unfiltered.vcf" ) command5 = ( "vcfutils.pl varFilter -d10 -Q20 " + OUTPUT_DIR + "vcf/" + genome + "_unfiltered.vcf > " + OUTPUT_DIR + "vcf/" + genome + ".vcf" ) command6 = "mv " + genome + ".sorted.bam* " + OUTPUT_DIR os.system(command1) os.system(command2) os.system(command3) os.system(command4) os.system(command5) os.system(command6) getAnnotations(genome)
def main(): if len(sys.argv) < 2: sys.stderr.write('USAGE: python processGenomes.py <Genome List>\n') sys.exit(1) in_file = sys.argv[1] genome_list = {} files = os.listdir(INPUT_DIR) with open(in_file, 'r') as infile: for line in infile.readlines(): genome = line.strip() exists = False for name in files: if genome in name and not exists: ext = name.split('.')[1].strip() if ext in EXT_DICT.keys(): genome_list[genome] = EXT_DICT[ext] exists = True if not exists: sys.stderr.write('No corresponding input file found for genome ' + genome + '!\n') count = 0 outfile = open('Mutation-analysis.log', 'w') for genome in genome_list.keys(): step = genome_list[genome] if step == 1: mapFastqFiles(genome) elif step == 2: processSamFiles(genome) elif step == 3: processBamFiles(genome) elif step == 4: getAnnotations(genome) count = count + 1 if count % 100 == 0: write_data = 'Processed ' + str(count) + ' genomes ...' print write_data outfile.write(write_data + '\n') outfile.close()