예제 #1
0
def retrieve_fam_data(snp_list, fam_loc, out_loc, annot_dict):
    line1 = True
    out = open(out_loc, mode = "w")
    with open(fam_loc, mode = "r") as fam:
        for line in fam:
            if line1:
                index_dict = pc_toolbox.read_fam_titles(line)
                title_list = ['lz_snp','rs_snp','imchip_snp',FAM_CHR_TITLE,FAM_POSITION_TITLE,
                              FAM_P_TITLE, FAM_Z_TITLE,FAM_WEIGHT_TITLE,FAM_A1_TITLE,FAM_A2_TITLE]
                out.write('\t'.join(title_list)+'\n')
                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
                weight = line_list[index_dict['weight']]
                z = line_list[index_dict['z']]
                pos = line_list[index_dict['pos']]
                a1 = line_list[index_dict['a1']]
                a2 = line_list[index_dict['a2']]
                chro = line_list[index_dict['chr']]
                pval = line_list[index_dict['p']]
                if snp in snp_list:
                    rs = annot_dict[snp].rs
                    im = annot_dict[snp].name
                    new_list = [snp,rs, im,chro,pos,pval,z,weight,a1,a2]
                    #or annot_dict[snp].name in snp_list or annot_dict[snp].rs in snp_list:
                    out.write('\t'.join(new_list)+'\n')
    out.close()
예제 #2
0
def repair_for_zs(z_list, lw_list, table_loc, build, table_type):
    print ('''
LW_LIST is:''')
    print lw_list
    #fix_loc = fix_it.locate_fixed_meta(table_loc, build)
    base, ext = os.path.splitext(table_loc)
    new_loc = base + '_noZs'+ext
    line1 = True
    noZ = open(new_loc, mode = "w")
    with open(table_loc, mode="r") as tabby:
        for line in tabby:
            if line1:
                if table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                if table_type == 'META':
                    index_dict = pc_toolbox.read_meta_titles(line)
                noZ.write(line)
                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
                p = line_list[index_dict['p']]
                pos = line_list[index_dict['pos']]
                if p == '0':
                    if snp in z_list:
                        line_list[index_dict['p']] = '1e-101'
                    else:
                        line_list[index_dict['p']] = '1e-100'
                elif snp in lw_list:
                    
                    print('''Changing p-value of {0} from {1} to NA due to low weight
coupled with high p-value.'''.format(snp, line_list[index_dict['p']]))
                    line_list[index_dict['p']] = 'NA'
                noZ.write('\t'.join(line_list)+'\n')
    noZ.close()
예제 #3
0
def filter_table(meta_loc, list_loc,table_type='META'):
    intersect_list = read_list(list_loc)
    base, ext = os.path.splitext(meta_loc)
    new_meta_loc = base + '_intersect'+ext
    line1 = True
    meta = open(meta_loc, mode="r")
    new_meta = open(new_meta_loc, mode="w")
    for line in meta:
        if line1:
            if table_type == 'META':
                index_dict = pc_toolbox.read_meta_titles(line)
            elif table_type == 'FAMILY':
                index_dict = pc_toolbox.read_fam_titles(line)
            new_meta.write(line)
            line1 = False
        else:
            line_list = line.strip().split()
            snp = line_list[index_dict['snp']]
            for isnp in intersect_list:
                if snp == isnp:
                    #print("Found in both lists: {0}".format(snp))
                    new_meta.write(line)
                    continue
    meta.close()
    new_meta.close()
예제 #4
0
def create_index_dict(table_loc, table_type):
    line1 = True
    with open(table_loc, mode = "r") as table:
        for line in table:
            if line1:
                line1 = False
                if table_type == 'family':
                    index_dict = pc_toolbox.read_fam_titles(line)
                elif table_type == 'assoc':
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
    return index_dict
예제 #5
0
def list_info(table_loc, table_type):
    SnpFo = namedtuple('SnpFo','snp,chro,pos')
    fo_list = list()
    counter = 0
    line1 = True
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if table_type in ['MAP','HAPMAP']:
                    index_dict = pc_toolbox.identify_map_indices()
                elif table_type in ['META']:
                    index_dict = pc_toolbox.read_meta_titles(line)
                elif table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                line1 = False
            else:
                ls = line.strip().split()
                snp_fo = SnpFo(chro=ls[index_dict['chr']],
                               snp=ls[index_dict['snp']],
                               pos=ls[index_dict['pos']])
                chro = snp_fo.chro
                fo_list.append(snp_fo)
    #sort_fo = sorted(fo_list, key=attrgetter('chro', 'pos'))
    return fo_list
예제 #6
0
def read_table(table_loc, gene, table_type, annot_dict=None):
    global weight_min, sex_weight
##    print("Weight Min is:")
##    print weight_min
    line1 = True
    standardizer = 1e6
##    if position_form == 'mb':
##        standardizer = 1e6
##    elif position_form == 'kb':
##        standardizer = 1e3
    
    old_p = 1
    old_out={}
    old_abs_z = 0
    reg_chr = gene.chro
    reg_start = int(float(gene.start) *standardizer)
    reg_end = int(float(gene.end) * standardizer)
    sym = gene.sym
    #band = gene.band
    ID = gene.ID
    table_counter = 0
    multi_counter = 1
    zero_list = []
    low_w_p = 1
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if table_type == 'META':
                    index_dict = pc_toolbox.read_meta_titles(line)
                if table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                table_counter = 1
                line1 = False
                old_out = {'chr':'','ID':'','start':'',
                           'end':'','sym':'','snp':'','im':'',
                           'lz':'','p':'1','pos':'',
                           '|z|':'0','z':'0','note':'--',
                           'weight':'','maf':'','meta_a1':'',
                           'meta_a2':'', 'checkme':''}
                lw_blank = {'chr':'','ID':'','start':'',
                            'end':'','sym':'','snp':None,'im':None,
                            'lz':None,'p':'1','pos':'',
                            '|z|':'0','z':'0','note':'--',
                            'weight':'','maf':'','meta_a1':'',
                            'meta_a2':''}
                low_weight_out = lw_blank
                  #,'checkme':'0'}
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[index_dict['pos']])
                if int(reg_chr) == int(line_list[index_dict['chr']]
                                       ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    
                    old_p = float(old_out['p'])
                    cur_p = float(line_list[index_dict['p']])
                    lz_snp = line_list[index_dict['snp']]
                    cur_w = float(line_list[index_dict['weight']])
                    cur_a1 = line_list[index_dict['a1']]
                    cur_a2 = line_list[index_dict['a2']]
                    cur_abs_z = abs(float(line_list[index_dict['z']]))
                    cur_z = line_list[index_dict['z']]
                    cur_snp = annot_dict[lz_snp].rs
                    cur_im = annot_dict[lz_snp].name
                    #snp, lz_snp = meta_toolbox.correct_snp(index_dict, line_list, annot_dict)
##                    if annot_dict is None:
##                        lz_snp=cur_snp
##                        if not cur_snp.startswith("rs"):
##                            lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = annot_dict[cur_snp].lz
##                    if not cur_snp.startswith('rs'):
##                        lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = cur_snp
                    cur_out = {'chr':gene.chro,'ID':gene.ID,'start':gene.start,
                               'end':gene.end,'sym':gene.sym,'snp':cur_snp,'im':cur_im,
                               'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
                               '|z|':str(cur_abs_z),'z':cur_z,'note':'--',
                               #'checkme':'0',
                               'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if float(cur_out['weight']) < weight_min:
                        #print("Weight ({0}) is less than Minimum ({1}).".format(str(cur_w),weight_min))
                        cur_out['checkme']='1'
                    else:
                        cur_out['checkme']='0'
##                    if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight):
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'0',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
##                    else:
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'1',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if cur_p == old_p:
                        multi_counter = multi_counter + 1
                        ZTup = namedtuple('ZTup','snp,lz,p,z,w')
                        cur_tup = ZTup(snp=cur_out['snp'],lz=cur_out['lz'],
                                       p=cur_out['p'],z=cur_out['|z|'],w=cur_out['weight'])
                        old_tup = ZTup(snp=old_out['snp'],lz=old_out['lz'],
                                       p=old_out['p'],z=old_out['|z|'],w=old_out['weight'])
                        zero_list.append(cur_tup)
                        if old_tup not in zero_list:
                            zero_list.append(old_tup)
                        print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out['snp'],cur_snp, cur_p))
                        print('{0} has a z-score of: {1}'.format(old_out['snp'],old_out['z']))
                        print('{0} has a z-score of: {1}'.format(cur_out['snp'],cur_out['z']))
                        if cur_out['|z|'] > old_out['|z|'] and cur_out['checkme']=='0':
                            old_p = cur_p
                            old_abs_z = cur_abs_z
                            old_out.update(cur_out)
                        print('Retaining {0} as the significant SNP based on z-score value.'.format(old_out['snp']))
                        old_out['note']='{0}SNPs(p={1})'.format(multi_counter, old_p)
                    elif cur_p < old_p:
                        if cur_out['checkme']=='0':
                            multi_counter = 1
                            old_p = cur_p
                            old_out.update(cur_out)
                            zero_list = []
                            if not low_weight_out['p'] == '1':
                                if cur_p < float(low_weight_out['p']):
                                    low_weight_out = lw_blank
                                    low_weight_out['p']='1'
                                    low_weight_out['lz']=None
                                    print("Wiping low weight snp!")
                        elif cur_p < float(low_weight_out['p']):
                            low_weight_out.update(cur_out)
                            print('''Adding {0} to low weight SNP list due to
weight = {1}
p-value = {2}'''.format(cur_snp, low_weight_out['weight'],low_weight_out['p']))
                table_counter = table_counter + 1

                
    sorted_list = sorted(zero_list, key=lambda member: member[3], reverse=True)
    #print old_out
    return old_out, sorted_list, low_weight_out['lz']
예제 #7
0
def fix_table(table_loc, annot_dict, purpose, build):
    line1 = True
    counter = 0
    if purpose in ['MAP','HAPMAP','FAMMAP']:
##        ##    (basepath, ext) = os.path.splitext(table_loc)
##    if not meta:
        (basepath, ext) = os.path.splitext(table_loc)
        orig_rename = basepath + '~'
        new_loc = str(rename_as_necessary(orig_rename, ext)) + ext
        shutil.copy(table_loc, new_loc)
        output_loc = table_loc
        table_loc = new_loc
    else:
        output_loc = locate_fixed_meta(table_loc, build)
##    else:
##        output_loc = locate_fixed_meta(table_loc, build)
    with open(table_loc, mode="r") as table:
        output = open(output_loc, mode="w")
        index_dict = {'chr':0,'pos':3,'snp':1}
        error_list = list()
        for line in table:
            if counter > 5000:
                print line_list
            if line1 and purpose not in ['MAP','HAPMAP','FAMMAP']:
                if purpose == "META":
                    index_dict = pc_toolbox.read_meta_titles(line)
                if purpose == "FAMILY":
                    index_dict = pc_toolbox.read_fam_titles(line)
##                if meta:
##                index_dict = meta_toolbox.read_meta_titles(line)
                print index_dict
                line_list = line.strip().split()
                if not purpose in ['MAP','HAPMAP','FAMMAP']:
                    line_list.append('annotation')
                output.write('\t'.join(line_list)+'\n')
                index_dict['annot']=len(line_list)-1
                print index_dict
                line1 = False
##                else:
##                    index_dict = {'chr':0,'pos':3,'snp':1}
##                output.write(line)
##                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
                if purpose in ['FAMILY','FAMMAP'] and snp in FAM_DIFF.keys():
                    print("MADE IT INTO CASE")
                    print snp
                    snp = FAM_DIFF[snp]
                    print snp
                if not purpose in ['MAP','HAPMAP','FAMMAP']:
                    lz_annot = get_lz_annot(annot_dict, snp)
                    line_list.append(lz_annot)
##                chro = line_list[index_dict['chr']]
##                pos = line_list[index_dict['pos']]
                if build == 'hg18':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg18_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26')
                        line_list[index_dict['pos']]= annot_dict[snp].hg18_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
                        
                if build == 'hg19':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg19_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26')
                        line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
##                line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
##                line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                if not purpose == 'HAPMAP':
                    try:
                        line_list[index_dict['snp']]= annot_dict[snp].lz
                    except KeyError:
                        print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']]))
##                      line_list[index_dict['snp']] = 'ERROR'
                if counter > 5000:
                    #print line_list
                    counter = 0
                counter = 1 + counter
                output.write('\t'.join(line_list)+'\n')
    output.close()
    base, ext = os.path.splitext(table_loc)
    error_loc = base + '_NAMEERRORS.txt'
    efile = open(error_loc,mode="w")
    efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n')
    for error in error_list:
        efile.write('\t'.join(error)+'\n')
    efile.close()
예제 #8
0
def fix_table(table_loc, annot_dict, purpose, build):
    line1 = True
    counter = 0
##    (basepath, ext) = os.path.splitext(table_loc)
    if purpose in ["HAPMAP","MAP"]:
        (basepath, ext) = os.path.splitext(table_loc)
        orig_rename = basepath + '~'
        new_loc = str(rename_as_necessary(orig_rename, ext)) + ext
        shutil.copy(table_loc, new_loc)
        output_loc = table_loc
        table_loc = new_loc
    else:
        output_loc = locate_fixed_table(table_loc, build)
    with open(table_loc, mode="r") as table:
        output = open(output_loc, mode="w")
        index_dict = {'chr':0,'pos':3,'snp':1}
        error_list = list()
        for line in table:
##            if counter > 5000:
##                print line_list
            if line1:
                if purpose == "META":
                    index_dict = pc_toolbox.read_meta_titles(line)
                elif purpose == "FAMILY":
                    index_dict = pc_toolbox.read_fam_titles(line)
                output.write(line)
                line1 = False
##                else:
##                    index_dict = {'chr':0,'pos':3,'snp':1}
##                output.write(line)
##                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
##                chro = line_list[index_dict['chr']]
##                pos = line_list[index_dict['pos']]
                if build == 'hg18':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg18_chr
                        line_list[index_dict['pos']]= annot_dict[snp].hg18_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
                        
                if build == 'hg19':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
                        line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
##                line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
##                line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                try:
                    line_list[index_dict['snp']]= annot_dict[snp].lz
                except KeyError:
                    print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']]))
##                    line_list[index_dict['snp']] = 'ERROR'
##                if counter > 5000:
##                    print line_list
##                    counter = 0
                counter = 1 + counter
                output.write('\t'.join(line_list)+'\n')
    output.close()
    base, ext = os.path.splitext(table_loc)
    error_loc = base + '_NAMEERRORS.txt'
    efile = open(error_loc,mode="w")
    efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n')
    for error in error_list:
        efile.write('\t'.join(error)+'\n')
    efile.close()