def mapFastqFiles(genome): if "main" in inspect.stack()[1][3]: dir = INPUT_DIR else: dir = OUTPUT_DIR filename_1 = dir + genome + ".fastq" filename_2 = dir + genome + "_1.fastq" filename_3 = dir + genome + "_2.fastq" se_exists = False if os.path.isfile(filename_1): print "Single End mapping" command1 = ( "bwa mem -t#cpus ref_files/TB_H37Rv_sequence_validated.fa " + filename_1 + " > " + OUTPUT_DIR + genome + "_se.sam" ) os.system(command1) se_exists = True command2 = ( "bwa mem -t#cpus ref_files/TB_H37Rv_sequence_validated.fa " + filename_2 + " " + filename_3 + " > " + OUTPUT_DIR + genome + "_pe.sam" ) os.system(command2) if se_exists: "Merging sams" command3 = ( "run_picard MergeSamFiles INPUT=" + OUTPUT_DIR + genome + "_se.sam" + " INPUT=" + OUTPUT_DIR + genome + "_pe.sam" + " OUTPUT=" + OUTPUT_DIR + genome + ".sam" ) else: command3 = "mv " + OUTPUT_DIR + genome + "_pe.sam " + OUTPUT_DIR + genome + ".sam" os.system(command3) if se_exists: command4 = "rm -rf " + OUTPUT_DIR + genome + "_se.sam" command5 = "rm -rf " + OUTPUT_DIR + genome + "_pe.sam" os.system(command4) os.system(command5) processSamFiles(genome)
def main(): if len(sys.argv) < 2: sys.stderr.write('USAGE: python processGenomes.py <Genome List>\n') sys.exit(1) in_file = sys.argv[1] genome_list = {} files = os.listdir(INPUT_DIR) with open(in_file, 'r') as infile: for line in infile.readlines(): genome = line.strip() exists = False for name in files: if genome in name and not exists: ext = name.split('.')[1].strip() if ext in EXT_DICT.keys(): genome_list[genome] = EXT_DICT[ext] exists = True if not exists: sys.stderr.write('No corresponding input file found for genome ' + genome + '!\n') count = 0 outfile = open('Mutation-analysis.log', 'w') for genome in genome_list.keys(): step = genome_list[genome] if step == 1: mapFastqFiles(genome) elif step == 2: processSamFiles(genome) elif step == 3: processBamFiles(genome) elif step == 4: getAnnotations(genome) count = count + 1 if count % 100 == 0: write_data = 'Processed ' + str(count) + ' genomes ...' print write_data outfile.write(write_data + '\n') outfile.close()