Ejemplo n.º 1
0
def read_sorted_bedgraphs(directory, transcript_dict):
    stranded_bedgraphs = {}
    for file in os.listdir(directory):
        if file.endswith("_CNAGsort.bedgraph"):
            if "plus" in file:
                if file.split('_plus')[0] not in stranded_bedgraphs:
                    stranded_bedgraphs[file.split('_plus')[0]] = [None, None]
                stranded_bedgraphs[file.split('_plus')[0]][0] = SP.read_CNAGsort_bedgraph2(file, transcript_dict, organism='pombe')
            elif 'minus' in file:
                if file.split('_minus')[0] not in stranded_bedgraphs:
                    stranded_bedgraphs[file.split('_minus')[0]] = [None, None]
                stranded_bedgraphs[file.split('_minus')[0]][1] = SP.read_CNAGsort_bedgraph2(file, transcript_dict, organism='pombe')
    return stranded_bedgraphs
Ejemplo n.º 2
0
def find_polyA_sites(transcript_dict, window=220):
    polyA_bg = SP.read_CNAGsort_bedgraph2(
        '/home/jordan/GENOMES/POMBE/polyA_sites_CNAGsort.bedgraph',
        transcript_dict,
        organism='pombe')
    pA_dict = {}
    for tx, s in polyA_bg.iteritems():
        s = s[s > 0]
        if len(s) > 0:
            if transcript_dict[tx][2] == '+':
                #pA_site = max(s.index)
                s.sort_values(ascending=False, inplace=True)
                pA_site = s.index[0]
                pA_dict[tx] = [
                    pA_site - window, pA_site + window, transcript_dict[tx][2],
                    transcript_dict[tx][3]
                ]
            elif transcript_dict[tx][2] == '-':
                #pA_site = min(s.index)
                s.sort_values(ascending=False, inplace=True)
                pA_site = s.index[0]
                pA_dict[tx] = [
                    pA_site - window, pA_site + window, transcript_dict[tx][2],
                    transcript_dict[tx][3]
                ]
    return pA_dict
Ejemplo n.º 3
0
def read_sorted_bedgraphs(directory, transcript_dict):
    stranded_bedgraphs = {}
    for file in os.listdir(directory):
        if file.endswith("_CNAGsort.bedgraph"):
            if "plus" in file:
                if file.split('_plus')[0] not in stranded_bedgraphs:
                    stranded_bedgraphs[file.split('_plus')[0]] = [None, None]
                stranded_bedgraphs[file.split('_plus')
                                   [0]][0] = SP.read_CNAGsort_bedgraph2(
                                       file, transcript_dict, organism='pombe')
            elif 'minus' in file:
                if file.split('_minus')[0] not in stranded_bedgraphs:
                    stranded_bedgraphs[file.split('_minus')[0]] = [None, None]
                stranded_bedgraphs[file.split('_minus')
                                   [0]][1] = SP.read_CNAGsort_bedgraph2(
                                       file, transcript_dict, organism='pombe')
    return stranded_bedgraphs
Ejemplo n.º 4
0
def find_polyA_sites(transcript_dict, window=220):
    polyA_bg = SP.read_CNAGsort_bedgraph2('/home/jordan/GENOMES/POMBE/polyA_sites_CNAGsort.bedgraph', transcript_dict, organism='pombe')
    pA_dict = {}
    for tx, s in polyA_bg.iteritems():
        s = s[s > 0]
        if len(s) > 0:
            if transcript_dict[tx][2] == '+':
                #pA_site = max(s.index)
                s.sort_values(ascending=False, inplace=True)
                pA_site = s.index[0]
                pA_dict[tx] = [pA_site-window, pA_site+window, transcript_dict[tx][2], transcript_dict[tx][3]]
            elif transcript_dict[tx][2] == '-':
                #pA_site = min(s.index)
                s.sort_values(ascending=False, inplace=True)
                pA_site = s.index[0]
                pA_dict[tx] = [pA_site-window, pA_site+window, transcript_dict[tx][2], transcript_dict[tx][3]]
    return pA_dict