Ejemplo n.º 1
0
def ribosome_cut(species):
    coor_dic = sqlcommon.as_coor(species)
    fna_dic = sqlcommon.as_fna(coor_dic, species)

    if species == "hs":
        file_array = ['SRR970538.fdp', 'SRR970490.fdp', 'SRR970565.fdp', 'SRR970561.fdp', 'SRR970587.fdp', 'SRR970588.fdp', 'H_Rep1.fdp', 'H_Rep2.fdp', 'N_Rep1.fdp', 'N_Rep2.fdp']
    else:
        file_array = ['SRR3208406.fdp']
    f_ri_depth = "original_data/"
    
    ri_depth_dic = {}
    f_count = 0

    # Open and read the file
    for i in file_array:
        with open(f_ri_depth + i, 'r') as fh:
            data = csv.reader(fh)
    
            for line in data:
                print i
                print line[0]
                temp_array = line[0].split('\t')
                temp_depth_array = [temp_array[1]]

                if temp_array[1] != 'NA':
                    temp_ri_len = len(fna_dic[temp_array[0]]) + 1
                    for depth in line[1:temp_ri_len]:
                        temp_depth_array.append(depth)
                    temp_depth_str = ';'.join(temp_depth_array)
                    
                    count_depth = len(temp_depth_str) / 5000;
                    
                    for k in range(count_depth):
                        temp_key = temp_array[0] + "_" + str(k)
                        temp_split_str = temp_depth_str[k * 5000 : (k + 1) * 5000]
                        if not temp_key in ri_depth_dic:
                            ri_depth_dic[temp_key] = []
                        while f_count != len(ri_depth_dic[temp_key]):
                            ri_depth_dic[temp_key].append('')
                        ri_depth_dic[temp_key].append(temp_split_str)

                    temp_key = temp_array[0] + "_" + str(count_depth)
                    temp_split_str = temp_depth_str[count_depth * 5000::]
                    if not temp_key in ri_depth_dic:
                        ri_depth_dic[temp_key] = []

                    while f_count != len(ri_depth_dic[temp_key]):
                        ri_depth_dic[temp_key].append('')

                    ri_depth_dic[temp_key].append(temp_split_str)

                    if len(line) != len(fna_dic[temp_array[0]]):
                        print line
            f_count += 1
    fh.close()

    return ri_depth_dic
Ejemplo n.º 2
0
        if not coor_dic[q][0] in gene_uorf_list:
            gene_uorf_list.append(coor_dic[q][0])

    return uorf_dic


# ---------------- ---------------- ---------------- ---------------- #
# Main Program
# ---------------- ---------------- ---------------- ---------------- #
if __name__ == "__main__":
    #./sql_main.py mouse

    species = sys.argv[1]
    gene_list = []

    coor_dic = sqlcommon.as_coor(species)
    nm_list = []

    for nm in coor_dic.keys():
        if coor_dic[nm][1] == 0:
            del coor_dic[nm]
        else:
            if not coor_dic[nm][0] in gene_list:
                gene_list.append(coor_dic[nm][0])
            if not nm in nm_list:
                nm_list.append(coor_dic[nm][1])

    IREZone_dic = data_IRES_IREZone(species, coor_dic)
    uorf_dic = data_uorf_sort(coor_dic, species)
    all_data_dic = {}