def read_assoc(assoc_loc, gene_name,gene_region_dict):
    line1 = True
    
    old_p = 1
    old_out=[]
    reg_chr, reg_start, reg_end = gene_region_dict[gene_name]
    assoc_counter = 0
    multi_counter = 1
    sig_list = []
    blank_dict = {'chr':'--','start':'--','end':'--','sym':'--','snp':'--',
                  'lz':'--','pval':'--','stat':'--','OR':'NA','aa1':'NA',
                  'pos':'--','notes':'--','maf':'NA','ma1':'NA','ma2':'NA'}
    order_list = ['chr','start','end','sym','snp','lz','pval','stat','OR',
                  'aa1','maf','ma1','ma2','notes']
    with open(assoc_loc, mode="r") as assoc:
        for line in assoc:
            line_list = line.strip().split()
            if line1:
                assoc_i = pc_toolbox.read_assoc_titles(line, c_interval)
##                assoc_indices = read_assoc_titles(line)
                assoc_counter = 1
                line1 = False
            else:
                snp_pos = int(line_list[assoc_i['pos']])
                if not line_list[assoc_i['p']] == 'NA':
                    if reg_chr == int(line_list[assoc_i['chr']]
                                      ) and pc_toolbox.in_interval(snp_pos,reg_start, reg_end):
                        cur_p = float(line_list[assoc_i['p']])
                        cur_snp , lz_snp = pc_toolbox.correct_snp(assoc_i,line_list)
##                        cur_snp = line_list[assoc_indices['snp']]
##                        lz_snp = '--'
##                        if not cur_snp.startswith('rs'):
##                            lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                        else:
##                            lz_snp = cur_snp
                        cur_out = [str(reg_chr), str(reg_start), str(reg_end),gene_name,
                                   cur_snp,lz_snp, str(cur_p), str(snp_pos),'--']
                        if cur_p == old_p:
                            multi_counter = multi_counter + 1
                            cur_tup = (cur_snp, lz_snp,str(cur_p))
                            old_tup = (old_out[4], old_out[5],str(old_p))
                            sig_list.append(cur_tup)
                            if old_tup not in sig_list:
                                sig_list.append(old_tup)
                            print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out[4],cur_snp, cur_p))
                            print('There are no Z-scores with which to decide between them, so I arbitrarily retain {0}.'.format(old_out[4]))
                            old_out[8]='{0}SNPs(p={1})'.format(multi_counter, old_p)
                            print(old_out[8])
                        elif cur_p < old_p:
                            multi_counter = 1
                            old_p = cur_p
                            old_out = cur_out
                            sig_list = []
                    assoc_counter = assoc_counter + 1
    
    return old_out, sig_list
Example #2
0
def create_index_dict(table_loc, table_type):
    line1 = True
    with open(table_loc, mode = "r") as table:
        for line in table:
            if line1:
                line1 = False
                if table_type == 'family':
                    index_dict = pc_toolbox.read_fam_titles(line)
                elif table_type == 'assoc':
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
    return index_dict
def filter_result(result_file, snpstar):
    line1 = True
    with open(result_file, mode="r") as result:
        for line in result:
            if line1:
                index_dict = pc_toolbox.read_assoc_titles(line,
                                                          .95, 'logistic')
                line1 = False
            else:
                lsplit = line.strip().split()
                if lsplit[index_dict['snp']] == snpstar:
                    #print('$$$ '+ line.strip())
                    info_tuple = pc_toolbox.assoc_tuple(line, index_dict)
                    #print info_tuple
                    return info_tuple
Example #4
0
def edit_table(table_loc, table_type):
    global perm
    line1 = True
    ztup_list = list()
    ##    lz_table_loc = new_table_loc(table_loc)
    with open(table_loc, mode="r") as table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if table_type == "assoc":
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None, plink_test="logistic")
                elif table_type == "eqtl":
                    index_dict = achilleas_yank.read_eqtl_titles(line, perm)
                    print index_dict
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                    print index_dict
                line1 = False
            else:
                ##                if line1:
                ##                    table_title_list = line_split
                ##                    parse_col()
                ##                    new_table.write('\t'.join(line_split)+'\n')
                ##                    line1 = False
                ##                else:
                ##                    cur_chr = line_split[meta_chrcol]
                cur_chr = line_split[index_dict["chr"]]
                cur_pos = int(line_split[index_dict["pos"]])
                ##                if not line_split[index_dict['snp']].startswith('rs'):
                ##                    line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos)
                if not line_split[index_dict["p"]] == "NA":
                    if float(line_split[index_dict["p"]]) == 0:
                        ZTup = namedtuple("ZTup", "lz,chro,pos,abs_z")
                        if assoc:
                            z_or_t = abs(float(line_split[index_dict["t"]]))
                        else:
                            z_or_t = abs(float(line_split[index_dict["z"]]))
                        z = ZTup(
                            lz=line_split[index_dict["snp"]],
                            chro=line_split[index_dict["chr"]],
                            pos=line_split[index_dict["pos"]],
                            abs_z=z_or_t,
                        )
                        ztup_list.append(z)
    ztup_sort = sorted(ztup_list, key=lambda z: z.abs_z, reverse=True)
    return index_dict, ztup_sort, table_loc
def make_snp_pos_list(input_assoc, output_name):
    snplist = list()
    line1 = True
    with open(input_assoc, mode="r") as assoc:
        for aline in assoc:
            if line1:
                index_dict = pc_toolbox.read_assoc_titles(aline,
                                                          .95, 'logistic')
                line1 = False
            else:
                lsplit = aline.strip().split()
                snp_tuple = (lsplit[index_dict['snp']], lsplit[index_dict['pos']])
                snplist.append(snp_tuple)
    with open(output_name, mode="w") as output:
        for snp in snplist:
            output.write('\t'.join(snp)+'\n')
    return snplist
def read_table(table_loc, region_list, assoc, annot_dict):
    #print("Now entering read_table.")
    line1 = True
    standardizer = 1e6
    snp_list = []
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if assoc:
                    table_indices = pc_toolbox.read_assoc_titles(line, .95, 'logistic')
                else:
                    table_indices = pc_toolbox.read_meta_titles(line)
                line1=False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[table_indices['pos']])
                chro = int(line_list[table_indices['chr']])
                p_str = line_list[table_indices['p']]
                #print line_list
                p_ok = True
                if not pc_toolbox.is_number(p_str):
                    p_ok = False
                elif float(p_str)>P_BOUND:
                    p_ok=False
                in_reg = False
                for region in region_list:
                    reg_chr = int(region.chro)
                    reg_start = int(float(region.start) *standardizer)
                    reg_end = int(float(region.end) * standardizer)
##                    print("Now checking region:")
##                    print region
                    if reg_chr == chro and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                        in_reg = True
                if p_ok and not in_reg:
                    snp_info = store_line(line, table_indices, assoc,annot_dict)
                    snp_list.append(snp_info)
    print snp_list
    sys.stdout.flush()
    tuple_list = sorted(snp_list, key=attrgetter('chro', 'p'))
    #snp_list.sort(key= lambda info : info.chro)
    print snp_list
    print tuple_list
    sys.stdout.flush()
    return tuple_list
def edit_table(table_loc, fix):
    line1=True
    ztup_list = []
    lz_table_loc = new_table_loc(table_loc)
    with open(table_loc, mode = 'r') as table:
        if fix:
            lz_table = open(lz_table_loc, mode='w')
        #with open(lz_table_loc, mode='w')as lz_table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if assoc:
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                line1=False
            else:               
##                if line1:
##                    table_title_list = line_split
##                    parse_col()
##                    new_table.write('\t'.join(line_split)+'\n')
##                    line1 = False
##                else:
##                    cur_chr = line_split[meta_chrcol]
                cur_chr = line_split[index_dict['chr']]
                cur_pos = int(line_split[index_dict['pos']])
                if not line_split[index_dict['snp']].startswith('rs'):
                    line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos)
                if not line_split[index_dict['p']]=='NA':
                    if float(line_split[index_dict['p']])==0:
                        ZTup = namedtuple('ZTup','lz,chro,pos')
                        z = ZTup(lz=line_split[index_dict['snp']],
                                 chro=line_split[index_dict['chr']],
                                 pos=line_split[index_dict['pos']])
                        ztup_list.append(z)
            if fix:
                lz_table.write('\t'.join(line_split)+'\n')
    if fix:
        lz_table.close()
        table_loc = lz_table_loc
    return index_dict, ztup_list, table_loc
def read_table(table_loc, chromosome, index_dict):

    #global high_pos, low_pos
    low_pos = 5000000000
    high_pos = 0
    line1 = True
    with open(table_loc, mode='r') as table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if assoc:
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                line1=False
            else:
                cur_chr = line_split[index_dict['chr']]
                cur_pos = int(line_split[index_dict['pos']])
                if chromosome==cur_chr and cur_pos < low_pos:
                    low_pos = cur_pos
                if chromosome==cur_chr and cur_pos > high_pos:
                    high_pos = cur_pos
    return (low_pos,high_pos)
def read_assoc(assoc_loc, SNP_loc,c_interval,plink_test, exp):
    global p_bound, index_dict, firstloop
    tuple_list = []
    with open(assoc_loc, mode = 'r') as assoc_file:
        line1_check = True                                        
        for assoc_line in assoc_file:                           #open the assoc file to be read
##            assoc_list = assoc_line.split()
            
            if line1_check:
                if firstloop:
                    index_dict = pc_toolbox.read_assoc_titles(assoc_line, c_interval, plink_test)
##                    index_dict = identify_cols(assoc_list, c_interval, plink_test)
                    firstloop = False
                if loopcount == 0:
                    if os.path.exists(SNP_loc):
                        clear = open(SNP_loc, mode='w')
                        clear.close()
                line1_check = False
            else:
                tuplet = pc_toolbox.assoc_tuple(assoc_line, index_dict)
##                tuplet = get_tuple(assoc_list, index_dict)
                if is_number(tuplet.p) and tuplet.p < float(p_bound)* math.pow(10,exp):
                        tuple_list.append(tuplet)
    return tuple_list
                  'a1':a1_index,
    old_p = 1
    old_out={}
    old_t = 0
    reg_chr = gene.chro
    reg_start = int(float(gene.start) *standardizer)
    reg_end = int(float(gene.end) * standardizer)
    sym = gene.sym
    band = gene.band
    assoc_counter = 0
    multi_counter = 1
    zero_list = []
    with open(assoc_loc, mode="r") as assoc:
        for line in meta:
            if line1:
                  assoc_indices = pc_toolbox.read_assoc_titles(line)
                  assoc_counter = 1
                  line1 = False
                  old_out = {'chr':'','band':'','start':'',
                             'end':'','sym':'','snp':'',
                             'p':'1','pos':'', 'or':'0',
                             'hi':'0', 'lo':'0','a1':'--',
                             't':'0','note':'--','checkme':'0'}
            else:
                line_info = read_assoc(line,.95, 'logistic')
##
##
##
##
##NOTE NOTE NOTE THIS IS WHERE EDITING STOPPED!!!! RESUME CHANGES HERE!!!!!!!!!!!!!
##