Python PipelineGWAS.countRiskAlleles 예제들

프로그래밍 언어: Python

클래스/타입: PipelineGWAS

메소드/함수: countRiskAlleles

hotexamples.com에서의 예제들: 2

Python PipelineGWAS.countRiskAlleles - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 PipelineGWAS.countRiskAlleles에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GCTA(2)

GWASResults(2)

FileGroup(2)

Plink2(2)

PlinkDev(2)

ABFScore(1)

parseFlashPCA(1)

getSNPs(1)

makeCredibleSet(1)

mergeQcExclusions(1)

plotPCA(1)

parsePed(1)

plotMapPhenotype(1)

flipRiskAlleles(1)

plotPhenotype(1)

plotRiskFrequency(1)

snpPriorityScore(1)

getEigenScores(1)

flagGender(1)

flagRelated(1)

flagInbred(1)

flagExcessHets(1)

findDuplicateVariants(1)

estimateDistributionParameters(1)

countRiskAlleles(1)

countByVariantAllele(1)

calcPriorsOnSnps(1)

calcPenetrance(1)

calcMaxAlleleFreqDiff(1)

PICSscore(1)

LdRank(1)

summariseResults(1)

예제 #1

파일 보기

파일: snps2architecture.py 프로젝트: MikeDMorgan/gwas_pipeline

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=["cases_explained", "probability_phenotype"],
                      help="Which results to report, either the proportion "
                      "of cases explained or the probability of the "
                      "phenotype given the number of alleles carried")

    parser.add_option("--map-file",
                      dest="map_file",
                      type="string",
                      help="plink .map file with SNP positions")

    parser.add_option("--ped-file",
                      dest="ped_file",
                      type="string",
                      help="plink ped file with phenotype and "
                      "genotype data - A2 major allele coded")

    parser.add_option("--gwas-file",
                      dest="gwas",
                      type="string",
                      help="gwas results file, assumes Plink "
                      "output format.  Must contain SNP, BP, "
                      "OR column headers.  Assumes results relate "
                      "to the A1 allele")

    parser.add_option("--flip-alleles",
                      dest="flip",
                      action="store_true",
                      help="force alleles to flip if OR < 1")

    parser.add_option("--plot-statistic",
                      dest="plot_stat",
                      type="choice",
                      choices=["frequency", "cumulative"],
                      help="plot either cases frequency or cumulative "
                      "frequency of cases")

    parser.add_option("--plot-path",
                      dest="plot_path",
                      type="string",
                      help="save path for plot")

    parser.add_option("--flag-explained-recessive",
                      dest="explained",
                      action="store_true",
                      help="flag individuals explained by carriage of "
                      "2 risk alleles - NOT IMPLIMENTED")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # required files are .ped file, .map file and gwas results file
    E.info("reading GWAS results file: %s" % options.gwas)
    snp_df = pd.read_table(options.gwas, sep="\t", header=0, index_col=None)
    snp_list = snp_df["SNP"].values

    # parse ped file
    E.info("Reading ped file: %s" % options.ped_file)
    ped_df = gwas.parsePed(options.ped_file, compound_geno=True)

    # parse map file and get SNP indices that correspond to
    # ped file genotypes
    E.info("Fetching SNPs from map file: %s" % options.map_file)
    snp_index = gwas.getSNPs(options.map_file, snp_list)

    E.info("SNPs found: %i" % len(snp_index))
    # extract SNPs and ORs as key, value pairs
    or_dict = snp_df.loc[:, ["SNP", "OR"]].to_dict(orient='list')
    snp_or = dict(zip(or_dict["SNP"], or_dict["OR"]))

    if options.flip:
        E.info("Flipping major alleles to risk alleles")
        flipped_genos = gwas.flipRiskAlleles(snp_index=snp_index,
                                             snp_results=snp_or,
                                             genos=ped_df["GENOS"].tolist())
        # merge flipped genotypes with pedigree frame to get phenotypes
        geno_df = pd.DataFrame(flipped_genos, index=ped_df["FID"])
    else:
        # split genos into a dataframe
        genos = np.array(ped_df["GENOS"].tolist())
        geno_df = pd.DataFrame(genos, index=ped_df["FID"])

    merged = pd.merge(geno_df, ped_df, left_index=True, right_on="FID")

    # need to discount missing genotypes > 1%

    # frequencies of number of risk alleles by trait frequency
    E.info("count #risk alleles per individual")
    risk_results = gwas.countRiskAlleles(ped_frame=merged,
                                         snp_index=snp_index.values(),
                                         report=options.method,
                                         flag=options.explained)
    risk_freqs = risk_results["freqs"]
    cumulative = risk_results["cumulative"]
    # select results upto and including cumulative freq = 1.0
    max_indx = [fx for fx, fy in enumerate(cumulative) if fy == 1.0][0]
    max_freqs = risk_freqs[:max_indx + 1]
    max_cum = cumulative[:max_indx + 1]
    bins = [ix for ix, iy in enumerate(cumulative)][:max_indx + 1]

    # plot!
    if options.plot_stat == "frequency":
        E.info("Generating plot of #risk alleles vs. P(Phenotype)")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_freqs,
                                         savepath=options.plot_path,
                                         ytitle="P(Phenotype)")
    elif options.plot_stat == "cumulative":
        E.info("Generating plot of #risk alleles vs. cumulative frequency")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_cum,
                                         savepath=options.plot_path,
                                         ytitle="Cumulative frequency cases")

    hist_df["freq"] = risk_results["freqs"][:max_indx + 1]
    hist_df["cumulative"] = risk_results["cumulative"][:max_indx + 1]
    hist_df["cases"] = risk_results["cases"][:max_indx + 1]
    hist_df["controls"] = risk_results["controls"][:max_indx + 1]
    hist_df["total"] = hist_df["cases"] + hist_df["controls"]
    hist_df.to_csv(options.stdout, sep="\t", index=None)

    # write footer and output benchmark information.
    E.Stop()

예제 #2

파일 보기

파일: snps2architecture.py 프로젝트: MikeDMorgan/gwas_pipeline

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--method", dest="method", type="choice",
                      choices=["cases_explained", "probability_phenotype"],
                      help="Which results to report, either the proportion "
                      "of cases explained or the probability of the "
                      "phenotype given the number of alleles carried")

    parser.add_option("--map-file", dest="map_file", type="string",
                      help="plink .map file with SNP positions")

    parser.add_option("--ped-file", dest="ped_file", type="string",
                      help="plink ped file with phenotype and "
                      "genotype data - A2 major allele coded")

    parser.add_option("--gwas-file", dest="gwas", type="string",
                      help="gwas results file, assumes Plink "
                      "output format.  Must contain SNP, BP, "
                      "OR column headers.  Assumes results relate "
                      "to the A1 allele")

    parser.add_option("--flip-alleles", dest="flip", action="store_true",
                      help="force alleles to flip if OR < 1")

    parser.add_option("--plot-statistic", dest="plot_stat", type="choice",
                      choices=["frequency", "cumulative"],
                      help="plot either cases frequency or cumulative "
                      "frequency of cases")

    parser.add_option("--plot-path", dest="plot_path", type="string",
                      help="save path for plot")

    parser.add_option("--flag-explained-recessive", dest="explained",
                      action="store_true",
                      help="flag individuals explained by carriage of "
                      "2 risk alleles - NOT IMPLIMENTED")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # required files are .ped file, .map file and gwas results file
    E.info("reading GWAS results file: %s" % options.gwas)
    snp_df = pd.read_table(options.gwas, sep="\t", header=0,
                           index_col=None)
    snp_list = snp_df["SNP"].values

    # parse ped file
    E.info("Reading ped file: %s" % options.ped_file)
    ped_df = gwas.parsePed(options.ped_file,
                           compound_geno=True)

    # parse map file and get SNP indices that correspond to
    # ped file genotypes
    E.info("Fetching SNPs from map file: %s" % options.map_file)
    snp_index = gwas.getSNPs(options.map_file,
                             snp_list)

    E.info("SNPs found: %i" % len(snp_index))
    # extract SNPs and ORs as key, value pairs
    or_dict = snp_df.loc[:, ["SNP", "OR"]].to_dict(orient='list')
    snp_or = dict(zip(or_dict["SNP"], or_dict["OR"]))

    if options.flip:
        E.info("Flipping major alleles to risk alleles")
        flipped_genos = gwas.flipRiskAlleles(snp_index=snp_index,
                                             snp_results=snp_or,
                                             genos=ped_df["GENOS"].tolist())
        # merge flipped genotypes with pedigree frame to get phenotypes
        geno_df = pd.DataFrame(flipped_genos, index=ped_df["FID"])
    else:
        # split genos into a dataframe
        genos = np.array(ped_df["GENOS"].tolist())
        geno_df = pd.DataFrame(genos, index=ped_df["FID"])

    merged = pd.merge(geno_df, ped_df, left_index=True, right_on="FID")

    # need to discount missing genotypes > 1%

    # frequencies of number of risk alleles by trait frequency
    E.info("count #risk alleles per individual")
    risk_results = gwas.countRiskAlleles(ped_frame=merged,
                                         snp_index=snp_index.values(),
                                         report=options.method,
                                         flag=options.explained)
    risk_freqs = risk_results["freqs"]
    cumulative = risk_results["cumulative"]
    # select results upto and including cumulative freq = 1.0
    max_indx = [fx for fx, fy in enumerate(cumulative) if fy == 1.0][0]
    max_freqs = risk_freqs[:max_indx + 1]
    max_cum = cumulative[:max_indx + 1]
    bins = [ix for ix, iy in enumerate(cumulative)][:max_indx + 1]

    # plot!
    if options.plot_stat == "frequency":
        E.info("Generating plot of #risk alleles vs. P(Phenotype)")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_freqs,
                                         savepath=options.plot_path,
                                         ytitle="P(Phenotype)")
    elif options.plot_stat == "cumulative":
        E.info("Generating plot of #risk alleles vs. cumulative frequency")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_cum,
                                         savepath=options.plot_path,
                                         ytitle="Cumulative frequency cases")

    hist_df["freq"] = risk_results["freqs"][:max_indx + 1]
    hist_df["cumulative"] = risk_results["cumulative"][:max_indx + 1]
    hist_df["cases"] = risk_results["cases"][:max_indx + 1]
    hist_df["controls"] = risk_results["controls"][:max_indx + 1]
    hist_df["total"] = hist_df["cases"] + hist_df["controls"]
    hist_df.to_csv(options.stdout, sep="\t", index=None)

    # write footer and output benchmark information.
    E.Stop()