def wig_pipe(infile, outfile): ''' (infile, outfile) Reads in a .bedfile formatted alignment file and returns a .wig formatted data file. ''' import wig_scripts bedfile = wig_scripts.read_tab_data(infile) read_pos = wig_scripts.bedfile_to_read_pos(bedfile) data = wig_scripts.count_reads(read_pos) wig_scripts.wig_out(outfile, data)
def process_bedfile_for_HMM(infile1, infile2, outfile1): ''' (total_TA_pos.wig, TnSeq_read_pos.bedfile, tnseq_data.wig) -> write to tnseq_data.wig This program reads in a .wig file of the 'TA' positions in the Rlv3841 replicon in question, a bedfile of all reads from a TnSeq alignment, and combines the positon data from the bedfile with the .wig list. It then prints out a file with the TnSeq data in a format useable by the ''' import wig_scripts output = [] processed = 0 total_ta_pos = [] total_tnseq_pos = [] TA_in = wig_scripts.read_space_data(infile1) TnSeq_in = wig_scripts.read_tab_data(infile2) read_counts = wig_scripts.bedfile_to_read_pos(TnSeq_in) header = TA_in.pop(0) ### Takes the position of every 'TA' and pushes it into a list in int format for pos in TA_in: ta = int(pos[0]) total_ta_pos.append(ta) for read in read_counts: value = int(read) total_tnseq_pos.append(value) for ta_site in total_ta_pos: read_number = total_tnseq_pos.count(ta_site) items =[] items = [ta_site, read_number] output.append(items) processed = processed+1 outfile1.write(header[0] + ' ' + header[1] + '\n') for result in output: position = str(result[0]) count = str(result[1]) outfile1.write(position) outfile1.write(' ') outfile1.write(count) outfile1.write('\n') outfile1.close() infile1.close() infile2.close()