Ejemplo n.º 1
0
gmap_SR_cmd = (
    gmap_path
    + " "
    + gmap_option
    + " "
    + " -D "
    + gmap_folder
    + " -d "
    + gmap_index
    + " "
    + SR_pathfilename
    + " > "
    + output_path
    + SR_filename
    + ".psl"
)
print gmap_SR_cmd
log_command(gmap_SR_cmd)

# notice we are not skipping any lines in the PSL file output by gmap (hence the zero in the following command)
bestblat_SR_cmd = (
    python_path + " " + bin_path2 + "blat_best.py " + output_path + SR_filename + ".psl 0 > " + output_pathfilename
)
log_command(bestblat_SR_cmd)

rm_SRpsl_cmd = "rm " + output_path + SR_filename + ".psl "

print rm_SRpsl_cmd
log_command(rm_SRpsl_cmd)
Ejemplo n.º 2
0
def print_run(cmd, ignorefail=False):
    print cmd
    print ""
    log_command(cmd, ignorefail)
Ejemplo n.º 3
0
ext_ls=[]
j=0
k=0
i=0
while i <Nthread1:
    ext_ls.append( '.' + string.lowercase[j] + string.lowercase[k] )
    k+=1
    if k==26:
        j+=1
        k=0
    i+=1

print "===split SR:==="    
splitSR_cmd = "split -l " + str(Nsplitline) + " " + SR_pathfilename + " " + output_path +SR_filename +"."
print splitSR_cmd
log_command(splitSR_cmd)

##########################################
print "===compress SR.aa:==="    

i=0
T_blat_SR_ls = []
for ext in ext_ls:
    blat_SR_cmd = blat_path + " " + blat_option + ' ' + output_path + SR_filename + ext + ' ' + output_path + SR_filename + ext + ".psl"
    print blat_SR_cmd
    T_blat_SR_ls.append( threading.Thread(target=log_command, args=(blat_SR_cmd,)) )
    T_blat_SR_ls[i].start()
    i+=1

for T in T_blat_SR_ls:
    T.join()
Ejemplo n.º 4
0
def main():
    # Read input parameters
    bin_path,command = GetPathAndName(sys.argv[0])
    regions_filename = sys.argv[1]
    reads_filename = sys.argv[2]
    num_threads = int(sys.argv[3])
    python_path = sys.argv[4]
    read_len = sys.argv[5]
    min_junction_overlap_len = sys.argv[6]
    output_filename = 'refSeq_MLE_input.txt'
    
    reads_file = open(reads_filename, 'r' )
    reads_files = []
    for thread_idx in range(num_threads):
        reads_files.append(open(reads_filename + '.' + str(thread_idx), 'w'))

        
    thread_idx = 0
    for line in reads_file:
        reads_files[thread_idx].write(line)
        thread_idx = (thread_idx + 1) % num_threads
        
    for thread_idx in range(num_threads):
        reads_files[thread_idx].close()
    reads_file.close()
    
    ##############################

    threads_list = []
    for thread_idx in range(num_threads):
        cmd = (python_path + " " + bin_path + 'parseSAM.py ' + regions_filename + ' ' + reads_filename + '.' + str(thread_idx) + 
               ' ' + output_filename + '.' + str(thread_idx) + ' ' + read_len + ' ' + min_junction_overlap_len)
        print cmd
        threads_list.append( threading.Thread(target=log_command, args=(cmd,)) )
        threads_list[thread_idx].start()

    for thread in threads_list:
        thread.join()
    
    output_file = open(output_filename, 'w')
    output_files = []
    
    header = 0
    for thread_idx in range(num_threads):
        output_files.append(open(output_filename + '.' + str(thread_idx), 'r'))
        header += int(output_files[thread_idx].readline())
    output_file.write(str(header) + '\n')
    
    genes_str_map = {}
    genes_reads_count_map = {}
    
    for thread_idx in range(num_threads):
        while True:
            line = output_files[thread_idx].readline()
            if (line == ''):
                break

            if not genes_str_map.has_key(line):
                lines = line 
                isoforms_line = output_files[thread_idx].readline()
                lines += isoforms_line 
                for i in range(4):
                    lines += output_files[thread_idx].readline()
                for i in range(len(isoforms_line.split())):
                    lines += output_files[thread_idx].readline()
                lines += output_files[thread_idx].readline()
                genes_reads_count_map[line] = [int(i) for i in output_files[thread_idx].readline().split()]

                genes_str_map[line] = lines
            else:
                isoforms_line = output_files[thread_idx].readline()
                for i in range(4):
                    output_files[thread_idx].readline()
                for i in range(len(isoforms_line.split())):
                    output_files[thread_idx].readline()
                output_files[thread_idx].readline()
                reads_count_line = [int(i) for i in output_files[thread_idx].readline().split()]
                for i in range(len(reads_count_line)):
                    genes_reads_count_map[line][i] += reads_count_line[i]

                
    genes = sorted(genes_str_map.keys())
    for gene in genes:
        output_file.write(genes_str_map[gene])
        for i in range(len(genes_reads_count_map[gene])):
            output_file.write(str(genes_reads_count_map[gene][i]).ljust(20))
        output_file.write('\n')
        
    
    
    for thread_idx in range(num_threads):
        output_files[thread_idx].close()
        rm_cmnd = "rm " + output_filename + '.' + str(thread_idx) + ' ' + reads_filename + '.' + str(thread_idx)
        log_command(rm_cmnd)
    output_file.close()
Ejemplo n.º 5
0
def main():
    # Read input parameters
    bin_path,command = GetPathAndName(sys.argv[0])
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    num_threads = int(sys.argv[3])
    python_path = sys.argv[4]
    penalty_filename = ''
    if (len(sys.argv) > 5):
        penalty_filename = sys.argv[5]
    
    input_file = open(input_filename, 'r' )
    header = input_file.readline()
    input_files = []
    output_filenames = []
    for thread_idx in range(num_threads):
        input_files.append(open(input_filename + '.' + str(thread_idx), 'w'))
        input_files[-1].write(header)

        
    thread_idx = 0
    while True:
        line = input_file.readline()
        if line == "": 
            break
        num_isoforms = int(line.split()[1])
        input_files[thread_idx].write(line)
        
        for i in range(6):
            input_files[thread_idx].write(input_file.readline())
        for i in range(num_isoforms):
            input_files[thread_idx].write(input_file.readline())
        for i in range(2):
            input_files[thread_idx].write(input_file.readline())
    
        thread_idx = (thread_idx + 1) % num_threads
        
    for thread_idx in range(num_threads):
        input_files[thread_idx].close()
    input_file.close()
    
    ##############################
    threads_list = []
    for thread_idx in range(num_threads):
        cmd = python_path + " " + bin_path + 'MLE_regions.py ' + input_filename + '.' + str(thread_idx) + ' ' + output_filename + '.' + str(thread_idx)
        if (penalty_filename != ''):
            cmd += ' ' + penalty_filename
        print cmd
        threads_list.append( threading.Thread(target=log_command, args=(cmd,)) )
        threads_list[thread_idx].start()

    for thread in threads_list:
        thread.join()
        
    cat_cmnd = 'cat '
    rm_cmnd = "rm "
    for thread_idx in range(num_threads):
        cat_cmnd += output_filename + '.' + str(thread_idx) + " "
        rm_cmnd += output_filename + '.' + str(thread_idx)  + " " + input_filename + '.' + str(thread_idx) + " "
        
    cat_cmnd += ' > ' + output_filename
    log_command(cat_cmnd)
    log_command(rm_cmnd)