Ejemplo n.º 1
0
def main():
    draw_me = False
    sum_only = False
    MHC = False
    regions = pc_toolbox.create_region_list(REGION_LOC)
    bfile = BFILE
    #ldfolder = LDFOLDER
    ldfolder = '/home/jkb4y/ubs/work/results/Intersection_06022014/eurmeta_LD/'
    #outfolder = OUTFOLDER
    outfolder = '/home/jkb4y/ubs/work/results/Intersection_06022014/CAnalysis_LD/'
    assoc_folder = ASSOC_FOLDER
    #assoc_folder = '/home/jkb4y/work/results/Intersection_06022014/CAnalysis_eurmeta'
    #for r2 in ['0','0.2','0.4','0.6','0.8']:
    for r2 in ['0']:
    #for r2 in ['0','0.2']:
    #for r2 in ['0.2','0.4']:
    #for r2 in ['0.8','0.6']:
    #r2 = '0.8'
        tot_loc = os.path.join(ldfolder,'all_regions_r2_{0}.ld'.format(r2))
        with open(tot_loc, mode="w") as total:
            ld_title_list = ['CHR_A','BP_A','SNP_A','CHR_B','BP_B','SNP_B','R2']
            total.write('\t'.join(ld_title_list)+'\n')
        for region in regions:
            chrband = region.ID
            chromosome = region.chro
            subfolder = pc_toolbox.chr_folder(outfolder, chromosome)
            assoc_chr_folder = pc_toolbox.chr_folder(assoc_folder, chromosome)
            if chrband == '6p21.32' and not MHC:
                continue
            ld, snp_loc = plink_ld(region, bfile, ldfolder, assoc_chr_folder, r2, sum_only)
            copy_to_tot(tot_loc, ld)
            
            lz_ld = ld_for_lz(ld)
            main_lz(region, subfolder, assoc_chr_folder, snp_loc,lz_ld, r2, draw_me)
Ejemplo n.º 2
0
def main():
    regions = pc_toolbox.create_region_list(REGION_LOC)
    ldfolder = '/home/jkb4y/work/results/2012Feb1/hg18/LD/'
    outfolder = '/home/jkb4y/work/results/2012Feb1/hg18/CAnalysis_Test/'
    for region in regions:
        chrband = region.band
        chromosome = region.chro
        subfolder = pc_toolbox.chr_folder(outfolder, chromosome)
        ldfile = chrband + '_r2_0.8_lz.ld'
        ld_sub = pc_toolbox.chr_folder(ldfolder, chromosome)
        ld = os.path.join(ld_sub,ldfile)
        snp_list = '{0}_~leastP_SNPs.txt'.format(chrband)
        snp_loc = os.path.join(ld_sub,snp_list)
        main_lz(region, subfolder, snp_loc,ld)
Ejemplo n.º 3
0
def plink_ld(region, bfile, ldfolder, assoc_folder, r2, sum_only=False):
    chromosome = region.chro
    chrband = region.ID
    ldfile = chrband + '_r2_{0}'.format(r2)
    #ldfile = 'EVI5_EXPERIMENT_r2_{0}'.format(r2)
    ld_sub = pc_toolbox.chr_folder(ldfolder, chromosome)
    ld = os.path.join(ld_sub,ldfile)
    #snp_list = 'EVI5_EXPERIMENT.txt'
    snp_list = '{0}_~leastP_SNPs.txt'.format(chrband)
    snp_loc = os.path.join(assoc_folder,snp_list)
    if sum_only:
        return ld + '.ld', snp_loc
    plink_args = ['plink','--noweb','--bfile',bfile, '--chr', chromosome,
                 '--from-mb', region.start, '--to-mb',region.end,
                 '--r2','--ld-window-r2',r2,'--ld-window','999999',
                 '--ld-window-kb','99999','--ld-snp-list',snp_loc,
                 '--out', ld,'--filter-controls','--maf', '0.005']
    p = subprocess.Popen(plink_args, bufsize = 0, executable=None,stdin=None,
                         stdout=None,stderr=None, preexec_fn=None,close_fds=False,
                         shell=False,cwd=None,env=None, universal_newlines=False,
                         startupinfo=None, creationflags=0)
    p.wait()
    return ld + '.ld', snp_loc
Ejemplo n.º 4
0
def main(argv):
    global loopcount, output_folder, plink_test, chromosome, refgene
    global maxloops, loopagain, SNP_loc, range_start_bp, range_end_bp
    global out_flag, condition_list, firstloop, pheno_tag, c_interval
    global chrband, interrupt

    firstloop = True
    loopagain = True
    loopcount = 0
    cl_arguments(argv)

    label = ""
    mb_start = convert_bp_to_mb(range_start_bp)
    mb_end = convert_bp_to_mb(range_end_bp)
    cl_indicator = ""
    rgene = refgene
    cband = chrband
    ptag = ""
    if not pheno_tag == "":
        ptag = "_" + pheno_tag
    if chrband is None:
        rangelabel = range_label(chromosome, refgene, range_start_bp, range_end_bp)
        cband = "NA"
    else:
        rangelabel = chrband
    if condition_list is not None:
        if interrupt:
            if "~" in condition_list:
                cl_indicator = "~"
        else:
            cl_indicator = "~"
    if refgene is None:
        rgene = "NA"
    label = rangelabel.replace(":", "_") + ptag + out_flag + "_" + cl_indicator
    of = "--"
    if not out_flag == "":
        of_pretty_list = out_flag.split("_")
        of = of_pretty_list[1]
    sys.stdout.write(
        """

******************************************************************************
        ***********************************************************
                    Beginning pc_workhorse.py on {0} at
                                    {1}
$$$     {2}     {3}     {4}     {5}     {6}     {7}
        ***********************************************************
******************************************************************************

""".format(
            label, time.strftime("%a,%c"), cband, chromosome, rgene, mb_start, mb_end, of + ptag
        )
    )
    sys.stdout.flush()
    output_folder = str(pc_toolbox.chr_folder(output_folder, chromosome))
    os.chdir(output_folder)
    assoc_extension = determine_extension(plink_test)

    SNP_loc = os.path.join(output_folder, label + "leastP_SNPs.txt")
    ##    if condition_list is not None:
    ##        ref_snp = given_condition_list(SNP_loc, condition_list)

    while loopcount < maxloops and loopagain == True:
        print_snp_list(SNP_loc)
        # ref_snp = None
        if interrupt:
            ref_snp, part_counter = given_condition_list(SNP_loc, condition_list, interrupt)
            print (
                """
******************************************************
<<<PROCESS INTERRUPTED AFTER IDENTIFYING CONDITIONAL SNP: {0}>>>
    PROCESS RESTARTED and LOGFILE APPENDED TO.
******************************************************
""".format(
                    loopcount
                )
            )
            print loopcount
            interrupt = False
        assoc_file_basename = label + str(loopcount) + "_SNPsOut"
        print assoc_file_basename
        assoc_file_name = assoc_file_basename + assoc_extension
        script_name = label + "script_" + str(loopcount) + ".txt"
        write_script(assoc_file_basename, script_name)
        plink(script_name)

        if loopcount == 0 and condition_list is not None:
            ref_snp, part_counter = given_condition_list(SNP_loc, condition_list, interrupt)
            title = (
                make_lz_title(rangelabel, pheno_tag, assoc_file_basename, ref_snp)
                .replace("_z", "")
                .replace("_a", "")
                .replace("_b", "")
            )
            pdf_loc = locuszoom(
                assoc_file_name, chromosome, range_start_bp, range_end_bp, assoc_file_basename, title, ref_snp
            )
            fix_pdf(pdf_loc)
            ref_snp = None
            loopcount = part_counter
        else:
            find_and_record(assoc_file_name, SNP_loc, condition_list, plink_test, c_interval)
            sys.stdout.flush()
            title = make_lz_title(rangelabel, pheno_tag, assoc_file_basename)
            pdf_loc = locuszoom(assoc_file_name, chromosome, range_start_bp, range_end_bp, assoc_file_basename, title)
            fix_pdf(pdf_loc)
            loopcount = loopcount + 1
    print (
        """
*****************************************************************
    ****************************************************
    Termination condition reached, and pc_workhorse
    ended at {0} for region {1} at {2}.
    ****************************************************
******************************************************************
""".format(
            time.strftime("%a,%c"), cband, rangelabel
        )
    )