def main(argv):
    global out_base, freq_loc, meta_loc, bfile, table_type
    global covar_loc, annot, weight_min, build, fix_args
    global family
    cl_arguments(argv)
    #fix_it.main(fix_args)
    gene_region_list = pc_toolbox.create_region_list(region_loc)
    annot_dict_loc = fix_it.locate_annot_dict(build)
##    print annot_dict_loc
    annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
##    print("Annotation Dictionary as Follows:")
##    print annot_dict
##    gene_list = sorted(key_list,key=lambda gene:gene_region_dict[gene][0])
    #head, tail = os.path.split(out_file)
    #keep_loc = os.path.join(out_file,'_plink_keep.txt')
    keep_loc = out_base+'_plink_keep.txt'
    plink_out = out_base+'_sigs_inCC'
    #fixed_meta_loc = fix_it.locate_fixed_meta(meta_loc, build)
##    print fixed_meta_loc
    fixed_meta_loc = meta_loc
    index_dict = create_index_dict(fixed_meta_loc,table_type)
    z_list, lw_list = find_leastP_SNPs(gene_region_list, fixed_meta_loc, index_dict,
                                       out_base, freq_loc, keep_loc,
                                       weight_min, table_type, annot_dict)
    if bfile is not None and covar_loc is not None:
        plink_it(keep_loc, bfile, covar_loc, plink_out)
    repair_for_zs(z_list, lw_list, fixed_meta_loc, index_dict, build)
def main(argv):
    global table_loc, build
    cl_arguments(argv)
    build = 'hg18'

    dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('MAP', dict_loc)
    tf = fix_joe_map(table_loc,annot_dict,build)
    look_up_rs_joe.main()
def main():
    
    global log_folder, summary_name, freq_loc, run_info, map_loc #, repair_loc
    cl_arguments(sys.argv[1:])
    if log_folder is None:
        usage()
        sys.exit(2)
    os.chdir(log_folder)
    annot_dict = fix_it.build_annot_dict('LOG')
    summarize_folder(log_folder,freq_loc,map_loc, summary_name,run_info,repair_loc, annot_dict)
def main(argv):
    region_loc = REGION_LOC
    build = BUILD
    meta_loc = META_LOC
    outfolder = OUTFOLDER
    region_list = pc_toolbox.create_region_list(region_loc)
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
    fixed_meta_loc = fix_it.locate_fixed_meta(meta_loc, build)
    for region in region_list:
        read_meta(fixed_meta_loc, region, outfolder, build, annot_dict)
def main(argv):
    global table_loc, region_loc, achilleas_key, build,out_base
    weight_min = 0
    cl_arguments(argv)
    perm = False
    if 'perm' in table_loc:
        perm = True
    achilleas_key_loc = ACHILLEAS_KEY_LOC
    achilleas_dict = build_key(achilleas_key_loc, 'IM')
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc)
    base, ext = os.path.splitext(table_loc)
    multi_loc = base + '_JB'+ext
    write_multi_table(table_loc, multi_loc, achilleas_dict,annot_dict)
    
##    region_list = pc_toolbox.create_region_list(region_loc)
##    keep_loc = out_base+'_plink_keep.txt'
##    plink_out = out_base+'_sigs_eQTL'
    index_dict = create_eqtl_index(multi_loc, perm)
    log_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
def main(argv):
    build = BUILD
    #list_file = LIST_FILE
    out_file = OUT_FILE
    outfolder = '/home/jkb4y/work/data/Joe/HapMapCorrect/'
    #map_file = MAP_FILE
    intersect_loc = INTERSECT_LOC
    assoc_loc = ASSOC_LOC
    title_list = ['imchip_name','rs_name','hg18_pos','hg19_pos','hg18_lz','intersect']
    
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc)
    intersect_annot_loc = fix_it.locate_annot_dict('hg19')
    intersect_annot_dict = fix_it.build_annot_dict('LOG',intersect_annot_loc)
    
    #im_list = old_read_list(list_file)
    intersect_list = read_list(intersect_loc, intersect_annot_dict)
    #map_and_key(map_file, out_file, annot_dict, intersect_list)
##    index_dict = {'chr':0,'pos':3,'snp':1}
##    out = open(out_file, mode="w")
##    out.write('\t'.join(title_list)+'\n')
##    with open(map_file, mode="r") as mappy:
##        for line in mappy:
##            line_list = line.strip().split()
##            im=line_list[index_dict['snp']]
##            info = look_up_snp(im, annot_dict)
##            intersect = 'no'
##
##            if info is not None:
##                
##                if info.name in intersect_list:
##                    intersect = 'yes'
##                out.write('\t'.join([im,info.rs, info.hg18_pos, info.hg19_pos,
##                                     info.lz,intersect]) +'\n')
##                
##            else:
##                out.write('\t'.join([im,im,line_list[index_dict['pos']],
##                                     '??','chr16:'+line_list[index_dict['pos']],
##                                     intersect])+'\n')
##    out.close()
    append_assoc(assoc_loc,outfolder, annot_dict, intersect_list)
def main(argv):
    cc_loc = CC_LOC
    fam_loc = FAM_LOC
    list_loc = LIST_LOC
    meta_loc = META_LOC
    chr_table = CHR_TABLE
    annot_loc = fix_it.locate_annot_dict('hg19')
    annot_dict = fix_it.build_annot_dict('LOG',annot_loc)
    intersect_list = make_list(cc_loc, fam_loc, BASE_LIST_LOC, annot_dict)
    #intersect_list = read_list(list_loc)
    filter_table(meta_loc, list_loc,'META')
    #intersect_list = read_list(list_loc)
    #filter_table(chr_table, list_loc)
    filter_table(FAM_TABLE, list_loc,'FAMILY')
def main(argv):
    global manifest_loc, table_loc, build
    cl_arguments(argv)
    if manifest_loc is not None:
        print("WRITING MANIFEST DICT!")
        write_manifest_dict(manifest_loc)
    dict_loc = locate_manifest_dict()
    
    manifest_dict = build_manifest_dict(dict_loc)
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('HAPMAP', annot_dict_loc)
    fix_table(table_loc, build, manifest_dict, annot_dict)
    key_loc = locate_table_key(table_loc)
    create_key(table_loc, manifest_dict, annot_dict, key_loc)
def main():
    build = BUILD
    #list_file = LIST_FILE
    out_file = OUT_FILE
    map_file = MAP_FILE
    intersect_loc = INTERSECT_LOC
    title_list = ['imchip_name','rs_name','hg18_pos','hg19_pos','hg18_lz','intersect']
    
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc)
    intersect_annot_loc = fix_it.locate_annot_dict('hg19')
    intersect_annot_dict = fix_it.build_annot_dict('LOG',intersect_annot_loc)
    
    #im_list = old_read_list(list_file)
    intersect_list = read_list(intersect_loc, intersect_annot_dict)
    index_dict = {'chr':0,'pos':3,'snp':1}
    out = open(out_file, mode="w")
    out.write('\t'.join(title_list)+'\n')
    with open(map_file, mode="r") as mappy:
        for line in mappy:
            line_list = line.strip().split()
            im=line_list[index_dict['snp']]
            info = look_up_snp(im, annot_dict)
            intersect = 'no'

            if info is not None:
                
                if info.name in intersect_list:
                    intersect = 'yes'
                out.write('\t'.join([im,info.rs, info.hg18_pos, info.hg19_pos,
                                     info.lz,intersect]) +'\n')
                
            else:
                out.write('\t'.join([im,im,line_list[index_dict['pos']],
                                     '??','chr16:'+line_list[index_dict['pos']],intersect])+'\n')
    out.close()
def fix_eur(orig_table, out_table, fix_dict):
    line1 = True
    out = open(out_table, mode="w")
    key_list = fix_dict.keys()
    with open(orig_table, mode="r") as orig:
        for line in orig:
            if line1:
                index_dict = pc_toolbox.read_meta_titles(line)
                ls = line.strip().split()
                ls.append('eurlowP_SNP')
                out.write('\t'.join(ls)+'\n')
                #out.write(line)
                line1 = False
            else:
                lp_flag = '.'
                lsplit = line.strip().split()
                snp = lsplit[index_dict['snp']]
                if snp in key_list:
                    lsplit[index_dict['z']]=fix_dict[snp].z
                    #lsplit[index_dict['p']]=fix_dict[snp].p
                    lsplit[index_dict['weight']]=fix_dict[snp].w
                    lsplit[index_dict['a1']]=fix_dict[snp].a1
                    lsplit[index_dict['a2']]='.'
                    lsplit[index_dict['direction']]='.'
                    key_list.remove(snp)
                    lp_flag = 'yes'
                lsplit.append(lp_flag)
                out.write('\t'.join(lsplit) + '\n')
    if len(key_list) > 0:
        print("{0} keys not found.".format(len(key_list)))
        annot_dict_loc = fix_it.locate_annot_dict('hg18')
        annot_dict = fix_it.build_annot_dict('MAP', annot_dict_loc)
        for key in key_list:
            #print fix_dict
            print(key)
            a1 = fix_dict[key].a1
            a2 = '.'
            weight = fix_dict[key].w
            z = fix_dict[key].z
            p = '0'
            direction = '.'
            chro = annot_dict[key].hg18_chr
            pos = annot_dict[key].hg18_pos
            lp_flag = 'yes'
            key_info = [key,a1,a2,weight,z,p,direction,chro,pos,lp_flag]
            out.write('\t'.join(key_info)+'\n')
            #out.write('\t'.join(fix_dict[key])+'\n')
    out.close()
def main():
    global out_file, table_loc, assoc, region_loc, build
    cl_arguments(sys.argv[1:])
    region_list = pc_toolbox.create_region_list(region_loc)
    annot_dict_loc = fix_it.locate_annot_dict(build)
    purpose = 'LOG'
    annot_dict = fix_it.build_annot_dict(purpose,annot_dict_loc)
##    annot_dict = None
##    print("Annotation Dictionary as Follows:")
##    print annot_dict
##    if annot is not None:
##        annot_dict = meta_toolbox.read_annot(annot)
##    elif fix:
##        annot_dict = meta_toolbox.read_annot_dict()
    snp_list = read_table(table_loc, region_list, assoc, annot_dict)
    print("inter_region_yank finds {0} SNPs of significance!".format(len(snp_list)))
    write_results(snp_list, out_file, assoc, annot_dict)
def main(argv):
    global annot_loc, table_loc, meta, build, hapmap, family
    cl_arguments(argv)
    print annot_loc
    if annot_loc is not None:
        print("WRITING ANNOT!")
        write_annot_dict(annot_loc, build)
    if meta:
        purpose = 'META'
    elif family:
        purpose = 'FAMILY'
    elif hapmap:
        purpose = 'HAPMAP'
    else:
        purpose = 'MAP'
    dict_loc = locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict(purpose, dict_loc)
    fix_table(table_loc,annot_dict,purpose,build)
def main(argv):
    global manifest_loc, table_loc, build, table_type, purpose
    cl_arguments(argv)
    #repair_dict = fix_it.DUPLICATE_FIX_DICT
    #rs_dup_list = fix_it.RS_KEEP
    base, ext = os.path.splitext(table_loc)
    im_dup_loc = base + '_imDups.list'
    all_dup_loc = base + '_allDups.list'
    dup_names_loc = base + '_dupNames.list'
#look up quinlan dictionary, using imchip names as key
    annot_dict_loc = fix_it.locate_annot_dict(build)
    key_type = table_type
    if not table_type == 'HAPMAP':
        key_type = 'LOG'
    annot_dict = fix_it.build_annot_dict(key_type, annot_dict_loc)

#extract SNP info from table
    print("Now gathering info from table....")
    snpFo_list = list_info(table_loc, table_type)
#check table for duplicates based on current SNP names
    print("Now searching list for duplicate snp names....")
    #rs_sort =  sorted(snpFo_list, key=attrgetter('snp'))
    dup_name_count, dup_name_list = find_dup_names(snpFo_list)
    print("FOUND {0} DUPLICATE SNP NAMES!!".format(dup_name_count))
    write_snp_list(dup_name_list, dup_names_loc)
#check table for duplicates based on current SNP positions
    print("Now sorting list for snp positions....")
    sort_fo = sorted(snpFo_list, key=attrgetter('chro', 'pos'))
    non_rs_dups, all_dups, rs_dup = find_dups(sort_fo, annot_dict)
    write_snp_list(non_rs_dups, im_dup_loc)
    
#check the duplicate list for the number of rs SNP names
    all_count, rs_count = count_dups(all_dups, all_dup_loc)
    print("\nTHERE ARE {0} TOTAL DUPLICATES --- {1} ORIGINALLY WITH RS-ID NAMES".format(all_count, rs_count))


#check for specific types of duplicates:
    #duplicates
#fix table based on current duplicate data
    #repair_dict = pc_toolbox.read_dict(repair_loc)
    repair_dict = fix_it.DUPLICATE_FIX_DICT
Beispiel #14
0
def main(argv):
    meta_yank_loc = '/home/jkb4y/work/results/Intersection/eurmeta_06062012/RegionYank/eurmeta_yank.tbl'
    fam_loc = '/home/jkb4y/work/data/2012Feb1/Family_data/eurgdtscan_06062012_lz_hg19_intersect.txt'
    out_loc = '/home/jkb4y/work/results/Intersection/family/eurmeta_snp_info.tbl'
    yank_out = '/home/jkb4y/work/results/Intersection/family/RegionYank/family'
    BFILE = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect'
    FREQ = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect_controls.frq'
    REGION_LOC = '/home/jkb4y/work/data/Region_Lists/hg19/T1D_regions_hg19_05242012.txt'
    COVAR = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect.cov'
    build = 'hg19'
##
##    table_yank.main(['--family',fam_loc, '--build',build,
##                     '--bfile',BFILE,'--freq',FREQ,'-r',REGION_LOC,
##                     '--covar',COVAR, '-o', yank_out])
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
    snp_list = get_snp_list(meta_yank_loc)
    fixed_table_loc = fam_loc
    #fixed_table_loc = fix_it.locate_fixed_table(fam_loc, build)
    retrieve_fam_data(snp_list, fixed_table_loc, out_loc, annot_dict)
##
    meta_yank.main(['--family',fam_loc, '--build',build,
                     '--bfile',BFILE,'--freq',FREQ,'-r',REGION_LOC,
                     '--covar',COVAR, '-o', yank_out])
def create_annot_dict(build, purpose):
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict(purpose,annot_dict_loc)
    return annot_dict
def main(argv):
    global outfolder, assoc, chromosome,snpstar
    global out_flag, user_script_loc
    global range_start_bp, range_end_bp, hit_index, hitstring
    global region_id, build, single, ldfolder, freq_loc, multi, hit1
    
    cl_arguments(argv)
    placeholder = ''
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
    snpstar_im = annot_dict[snpstar].name
    table_folder = os.path.join(outfolder, 'ResultTables')
    summary_folder = os.path.join(outfolder, 'SummaryTables')
    str_hit_index = str(hit_index)
    if hit_index < 10:
        str_hit_index = '0'+str_hit_index
    if not os.path.exists(table_folder):
        os.makedirs(table_folder)
    if not os.path.exists(summary_folder):
        os.makedirs(summary_folder)
    chr_folder = os.path.join(outfolder, 'chr{0}'.format(chromosome))
    if not os.path.exists(chr_folder):
        os.makedirs(chr_folder)
    reg_folder = os.path.join(chr_folder, region_id)
    if multi:
        placeholder = '_'+snpstar+'_'+str_hit_index
    if not os.path.exists(reg_folder):
        os.makedirs(reg_folder)
    super_outbase = os.path.join(reg_folder, region_id)
    list_loc = super_outbase + '_snps.list'
    
    table_loc = os.path.join(chr_folder, region_id+placeholder+'.tbl')
    new_table_loc = os.path.join(table_folder, region_id+placeholder+'.tbl')
    summary_table_loc = os.path.join(summary_folder, region_id+placeholder+'.tbl')
    print(table_loc)
    snplist = make_snp_pos_list(assoc,list_loc)
    ld_loc = os.path.join(ldfolder,'chr{0}'.format(chromosome),
                              '{0}_r2_0.ld'.format(region_id))
    table = open(table_loc, mode="w")
    table.write('\t'.join(['SNP*','SNP*_pos','SNP*_im','conditional_snp',
                           'csnp_im','csnp_pos','SNP*_pvalue','OR','ci_lo','ci_hi',
                           'a1',"r2","csnp_freq","csnp_freq_a1"])+'\n')
    table.close()
    index = 1
    for snp_tuple in snplist:
        snp = snp_tuple[0]
        snp_pos = snp_tuple[1]
        snp_im = annot_dict[snp].name
        corrected_snp = snp.replace(':','_')
        outbase = super_outbase+'_'+corrected_snp
        script_loc = outbase + '.script'
        assoc_out = outbase +'.assoc.logistic'
        if hit1:
            write_script(outbase,script_loc, user_script_loc, snp, single, hitstring)
            plink(script_loc)
            print('''
**********************************************************************
    ************************************************************
    The data will be conditioned on the following SNP:
    %%%             {0}
    This is snp #{1} in a list of {2}.
        ******************************************************
**********************************************************************
'''.format(snp,index,len(snplist)))
##            if snp == snpstar:
##                write_script(outbase,script_loc, user_script_loc, snp, single, hitstring)
##                plink(script_loc)
        elif not multi:
            write_script(outbase,script_loc, user_script_loc, snp, single, snpstar)
            plink(script_loc)
            print('''
**********************************************************************
    ************************************************************
    The data will be conditioned on the following SNP:
    %%%             {0}
    This is snp #{1} in a list of {2}.
        ******************************************************
**********************************************************************
'''.format(snp,index,len(snplist)))
            
        info = filter_result(assoc_out,snpstar)
        snp_freq = pc_toolbox.retrieve_freq(freq_loc, snp)
        r2 = pc_toolbox.retrieve_r2(snpstar,snp,ld_loc)
        if r2 is None:
            r2 = "???"
        
        index = index + 1
        with open(table_loc, mode='a') as table:
            table.write('\t'.join([snpstar,info.pos, snpstar_im,
                                    snp,snp_im,snp_pos,str(info.p),
                                    info.OR,info.lo,info.hi,
                                    info.a1,r2,snp_freq[0],snp_freq[1]])+'\n')
    shutil.copy(table_loc, new_table_loc)
    summarize_table(new_table_loc, summary_table_loc, snpstar)