gmap_SR_cmd = ( gmap_path + " " + gmap_option + " " + " -D " + gmap_folder + " -d " + gmap_index + " " + SR_pathfilename + " > " + output_path + SR_filename + ".psl" ) print gmap_SR_cmd log_command(gmap_SR_cmd) # notice we are not skipping any lines in the PSL file output by gmap (hence the zero in the following command) bestblat_SR_cmd = ( python_path + " " + bin_path2 + "blat_best.py " + output_path + SR_filename + ".psl 0 > " + output_pathfilename ) log_command(bestblat_SR_cmd) rm_SRpsl_cmd = "rm " + output_path + SR_filename + ".psl " print rm_SRpsl_cmd log_command(rm_SRpsl_cmd)
def print_run(cmd, ignorefail=False): print cmd print "" log_command(cmd, ignorefail)
ext_ls=[] j=0 k=0 i=0 while i <Nthread1: ext_ls.append( '.' + string.lowercase[j] + string.lowercase[k] ) k+=1 if k==26: j+=1 k=0 i+=1 print "===split SR:===" splitSR_cmd = "split -l " + str(Nsplitline) + " " + SR_pathfilename + " " + output_path +SR_filename +"." print splitSR_cmd log_command(splitSR_cmd) ########################################## print "===compress SR.aa:===" i=0 T_blat_SR_ls = [] for ext in ext_ls: blat_SR_cmd = blat_path + " " + blat_option + ' ' + output_path + SR_filename + ext + ' ' + output_path + SR_filename + ext + ".psl" print blat_SR_cmd T_blat_SR_ls.append( threading.Thread(target=log_command, args=(blat_SR_cmd,)) ) T_blat_SR_ls[i].start() i+=1 for T in T_blat_SR_ls: T.join()
def main(): # Read input parameters bin_path,command = GetPathAndName(sys.argv[0]) regions_filename = sys.argv[1] reads_filename = sys.argv[2] num_threads = int(sys.argv[3]) python_path = sys.argv[4] read_len = sys.argv[5] min_junction_overlap_len = sys.argv[6] output_filename = 'refSeq_MLE_input.txt' reads_file = open(reads_filename, 'r' ) reads_files = [] for thread_idx in range(num_threads): reads_files.append(open(reads_filename + '.' + str(thread_idx), 'w')) thread_idx = 0 for line in reads_file: reads_files[thread_idx].write(line) thread_idx = (thread_idx + 1) % num_threads for thread_idx in range(num_threads): reads_files[thread_idx].close() reads_file.close() ############################## threads_list = [] for thread_idx in range(num_threads): cmd = (python_path + " " + bin_path + 'parseSAM.py ' + regions_filename + ' ' + reads_filename + '.' + str(thread_idx) + ' ' + output_filename + '.' + str(thread_idx) + ' ' + read_len + ' ' + min_junction_overlap_len) print cmd threads_list.append( threading.Thread(target=log_command, args=(cmd,)) ) threads_list[thread_idx].start() for thread in threads_list: thread.join() output_file = open(output_filename, 'w') output_files = [] header = 0 for thread_idx in range(num_threads): output_files.append(open(output_filename + '.' + str(thread_idx), 'r')) header += int(output_files[thread_idx].readline()) output_file.write(str(header) + '\n') genes_str_map = {} genes_reads_count_map = {} for thread_idx in range(num_threads): while True: line = output_files[thread_idx].readline() if (line == ''): break if not genes_str_map.has_key(line): lines = line isoforms_line = output_files[thread_idx].readline() lines += isoforms_line for i in range(4): lines += output_files[thread_idx].readline() for i in range(len(isoforms_line.split())): lines += output_files[thread_idx].readline() lines += output_files[thread_idx].readline() genes_reads_count_map[line] = [int(i) for i in output_files[thread_idx].readline().split()] genes_str_map[line] = lines else: isoforms_line = output_files[thread_idx].readline() for i in range(4): output_files[thread_idx].readline() for i in range(len(isoforms_line.split())): output_files[thread_idx].readline() output_files[thread_idx].readline() reads_count_line = [int(i) for i in output_files[thread_idx].readline().split()] for i in range(len(reads_count_line)): genes_reads_count_map[line][i] += reads_count_line[i] genes = sorted(genes_str_map.keys()) for gene in genes: output_file.write(genes_str_map[gene]) for i in range(len(genes_reads_count_map[gene])): output_file.write(str(genes_reads_count_map[gene][i]).ljust(20)) output_file.write('\n') for thread_idx in range(num_threads): output_files[thread_idx].close() rm_cmnd = "rm " + output_filename + '.' + str(thread_idx) + ' ' + reads_filename + '.' + str(thread_idx) log_command(rm_cmnd) output_file.close()
def main(): # Read input parameters bin_path,command = GetPathAndName(sys.argv[0]) input_filename = sys.argv[1] output_filename = sys.argv[2] num_threads = int(sys.argv[3]) python_path = sys.argv[4] penalty_filename = '' if (len(sys.argv) > 5): penalty_filename = sys.argv[5] input_file = open(input_filename, 'r' ) header = input_file.readline() input_files = [] output_filenames = [] for thread_idx in range(num_threads): input_files.append(open(input_filename + '.' + str(thread_idx), 'w')) input_files[-1].write(header) thread_idx = 0 while True: line = input_file.readline() if line == "": break num_isoforms = int(line.split()[1]) input_files[thread_idx].write(line) for i in range(6): input_files[thread_idx].write(input_file.readline()) for i in range(num_isoforms): input_files[thread_idx].write(input_file.readline()) for i in range(2): input_files[thread_idx].write(input_file.readline()) thread_idx = (thread_idx + 1) % num_threads for thread_idx in range(num_threads): input_files[thread_idx].close() input_file.close() ############################## threads_list = [] for thread_idx in range(num_threads): cmd = python_path + " " + bin_path + 'MLE_regions.py ' + input_filename + '.' + str(thread_idx) + ' ' + output_filename + '.' + str(thread_idx) if (penalty_filename != ''): cmd += ' ' + penalty_filename print cmd threads_list.append( threading.Thread(target=log_command, args=(cmd,)) ) threads_list[thread_idx].start() for thread in threads_list: thread.join() cat_cmnd = 'cat ' rm_cmnd = "rm " for thread_idx in range(num_threads): cat_cmnd += output_filename + '.' + str(thread_idx) + " " rm_cmnd += output_filename + '.' + str(thread_idx) + " " + input_filename + '.' + str(thread_idx) + " " cat_cmnd += ' > ' + output_filename log_command(cat_cmnd) log_command(rm_cmnd)