def read_sorted_bedgraphs(directory, transcript_dict): stranded_bedgraphs = {} for file in os.listdir(directory): if file.endswith("_CNAGsort.bedgraph"): if "plus" in file: if file.split('_plus')[0] not in stranded_bedgraphs: stranded_bedgraphs[file.split('_plus')[0]] = [None, None] stranded_bedgraphs[file.split('_plus')[0]][0] = SP.read_CNAGsort_bedgraph2(file, transcript_dict, organism='pombe') elif 'minus' in file: if file.split('_minus')[0] not in stranded_bedgraphs: stranded_bedgraphs[file.split('_minus')[0]] = [None, None] stranded_bedgraphs[file.split('_minus')[0]][1] = SP.read_CNAGsort_bedgraph2(file, transcript_dict, organism='pombe') return stranded_bedgraphs
def find_polyA_sites(transcript_dict, window=220): polyA_bg = SP.read_CNAGsort_bedgraph2( '/home/jordan/GENOMES/POMBE/polyA_sites_CNAGsort.bedgraph', transcript_dict, organism='pombe') pA_dict = {} for tx, s in polyA_bg.iteritems(): s = s[s > 0] if len(s) > 0: if transcript_dict[tx][2] == '+': #pA_site = max(s.index) s.sort_values(ascending=False, inplace=True) pA_site = s.index[0] pA_dict[tx] = [ pA_site - window, pA_site + window, transcript_dict[tx][2], transcript_dict[tx][3] ] elif transcript_dict[tx][2] == '-': #pA_site = min(s.index) s.sort_values(ascending=False, inplace=True) pA_site = s.index[0] pA_dict[tx] = [ pA_site - window, pA_site + window, transcript_dict[tx][2], transcript_dict[tx][3] ] return pA_dict
def read_sorted_bedgraphs(directory, transcript_dict): stranded_bedgraphs = {} for file in os.listdir(directory): if file.endswith("_CNAGsort.bedgraph"): if "plus" in file: if file.split('_plus')[0] not in stranded_bedgraphs: stranded_bedgraphs[file.split('_plus')[0]] = [None, None] stranded_bedgraphs[file.split('_plus') [0]][0] = SP.read_CNAGsort_bedgraph2( file, transcript_dict, organism='pombe') elif 'minus' in file: if file.split('_minus')[0] not in stranded_bedgraphs: stranded_bedgraphs[file.split('_minus')[0]] = [None, None] stranded_bedgraphs[file.split('_minus') [0]][1] = SP.read_CNAGsort_bedgraph2( file, transcript_dict, organism='pombe') return stranded_bedgraphs
def find_polyA_sites(transcript_dict, window=220): polyA_bg = SP.read_CNAGsort_bedgraph2('/home/jordan/GENOMES/POMBE/polyA_sites_CNAGsort.bedgraph', transcript_dict, organism='pombe') pA_dict = {} for tx, s in polyA_bg.iteritems(): s = s[s > 0] if len(s) > 0: if transcript_dict[tx][2] == '+': #pA_site = max(s.index) s.sort_values(ascending=False, inplace=True) pA_site = s.index[0] pA_dict[tx] = [pA_site-window, pA_site+window, transcript_dict[tx][2], transcript_dict[tx][3]] elif transcript_dict[tx][2] == '-': #pA_site = min(s.index) s.sort_values(ascending=False, inplace=True) pA_site = s.index[0] pA_dict[tx] = [pA_site-window, pA_site+window, transcript_dict[tx][2], transcript_dict[tx][3]] return pA_dict