Esempi in Python per PipelineGWAS.flipRiskAlleles

Linguaggio di programmazione: Python

Classe/tipologia: PipelineGWAS

Metodo/funzione: flipRiskAlleles

Esempi su hotexamples.com: 2

PipelineGWAS.flipRiskAlleles in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per PipelineGWAS.flipRiskAlleles, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

GCTA(2)

GWASResults(2)

FileGroup(2)

Plink2(2)

PlinkDev(2)

ABFScore(1)

parseFlashPCA(1)

getSNPs(1)

makeCredibleSet(1)

mergeQcExclusions(1)

plotPCA(1)

parsePed(1)

plotMapPhenotype(1)

flipRiskAlleles(1)

plotPhenotype(1)

plotRiskFrequency(1)

snpPriorityScore(1)

getEigenScores(1)

flagGender(1)

flagRelated(1)

flagInbred(1)

flagExcessHets(1)

findDuplicateVariants(1)

estimateDistributionParameters(1)

countRiskAlleles(1)

countByVariantAllele(1)

calcPriorsOnSnps(1)

calcPenetrance(1)

calcMaxAlleleFreqDiff(1)

PICSscore(1)

LdRank(1)

summariseResults(1)

Esempio n. 1

Mostra file

File: snps2architecture.py Progetto: MikeDMorgan/gwas_pipeline

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=["cases_explained", "probability_phenotype"],
                      help="Which results to report, either the proportion "
                      "of cases explained or the probability of the "
                      "phenotype given the number of alleles carried")

    parser.add_option("--map-file",
                      dest="map_file",
                      type="string",
                      help="plink .map file with SNP positions")

    parser.add_option("--ped-file",
                      dest="ped_file",
                      type="string",
                      help="plink ped file with phenotype and "
                      "genotype data - A2 major allele coded")

    parser.add_option("--gwas-file",
                      dest="gwas",
                      type="string",
                      help="gwas results file, assumes Plink "
                      "output format.  Must contain SNP, BP, "
                      "OR column headers.  Assumes results relate "
                      "to the A1 allele")

    parser.add_option("--flip-alleles",
                      dest="flip",
                      action="store_true",
                      help="force alleles to flip if OR < 1")

    parser.add_option("--plot-statistic",
                      dest="plot_stat",
                      type="choice",
                      choices=["frequency", "cumulative"],
                      help="plot either cases frequency or cumulative "
                      "frequency of cases")

    parser.add_option("--plot-path",
                      dest="plot_path",
                      type="string",
                      help="save path for plot")

    parser.add_option("--flag-explained-recessive",
                      dest="explained",
                      action="store_true",
                      help="flag individuals explained by carriage of "
                      "2 risk alleles - NOT IMPLIMENTED")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # required files are .ped file, .map file and gwas results file
    E.info("reading GWAS results file: %s" % options.gwas)
    snp_df = pd.read_table(options.gwas, sep="\t", header=0, index_col=None)
    snp_list = snp_df["SNP"].values

    # parse ped file
    E.info("Reading ped file: %s" % options.ped_file)
    ped_df = gwas.parsePed(options.ped_file, compound_geno=True)

    # parse map file and get SNP indices that correspond to
    # ped file genotypes
    E.info("Fetching SNPs from map file: %s" % options.map_file)
    snp_index = gwas.getSNPs(options.map_file, snp_list)

    E.info("SNPs found: %i" % len(snp_index))
    # extract SNPs and ORs as key, value pairs
    or_dict = snp_df.loc[:, ["SNP", "OR"]].to_dict(orient='list')
    snp_or = dict(zip(or_dict["SNP"], or_dict["OR"]))

    if options.flip:
        E.info("Flipping major alleles to risk alleles")
        flipped_genos = gwas.flipRiskAlleles(snp_index=snp_index,
                                             snp_results=snp_or,
                                             genos=ped_df["GENOS"].tolist())
        # merge flipped genotypes with pedigree frame to get phenotypes
        geno_df = pd.DataFrame(flipped_genos, index=ped_df["FID"])
    else:
        # split genos into a dataframe
        genos = np.array(ped_df["GENOS"].tolist())
        geno_df = pd.DataFrame(genos, index=ped_df["FID"])

    merged = pd.merge(geno_df, ped_df, left_index=True, right_on="FID")

    # need to discount missing genotypes > 1%

    # frequencies of number of risk alleles by trait frequency
    E.info("count #risk alleles per individual")
    risk_results = gwas.countRiskAlleles(ped_frame=merged,
                                         snp_index=snp_index.values(),
                                         report=options.method,
                                         flag=options.explained)
    risk_freqs = risk_results["freqs"]
    cumulative = risk_results["cumulative"]
    # select results upto and including cumulative freq = 1.0
    max_indx = [fx for fx, fy in enumerate(cumulative) if fy == 1.0][0]
    max_freqs = risk_freqs[:max_indx + 1]
    max_cum = cumulative[:max_indx + 1]
    bins = [ix for ix, iy in enumerate(cumulative)][:max_indx + 1]

    # plot!
    if options.plot_stat == "frequency":
        E.info("Generating plot of #risk alleles vs. P(Phenotype)")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_freqs,
                                         savepath=options.plot_path,
                                         ytitle="P(Phenotype)")
    elif options.plot_stat == "cumulative":
        E.info("Generating plot of #risk alleles vs. cumulative frequency")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_cum,
                                         savepath=options.plot_path,
                                         ytitle="Cumulative frequency cases")

    hist_df["freq"] = risk_results["freqs"][:max_indx + 1]
    hist_df["cumulative"] = risk_results["cumulative"][:max_indx + 1]
    hist_df["cases"] = risk_results["cases"][:max_indx + 1]
    hist_df["controls"] = risk_results["controls"][:max_indx + 1]
    hist_df["total"] = hist_df["cases"] + hist_df["controls"]
    hist_df.to_csv(options.stdout, sep="\t", index=None)

    # write footer and output benchmark information.
    E.Stop()

Esempio n. 2

Mostra file

File: snps2architecture.py Progetto: MikeDMorgan/gwas_pipeline

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--method", dest="method", type="choice",
                      choices=["cases_explained", "probability_phenotype"],
                      help="Which results to report, either the proportion "
                      "of cases explained or the probability of the "
                      "phenotype given the number of alleles carried")

    parser.add_option("--map-file", dest="map_file", type="string",
                      help="plink .map file with SNP positions")

    parser.add_option("--ped-file", dest="ped_file", type="string",
                      help="plink ped file with phenotype and "
                      "genotype data - A2 major allele coded")

    parser.add_option("--gwas-file", dest="gwas", type="string",
                      help="gwas results file, assumes Plink "
                      "output format.  Must contain SNP, BP, "
                      "OR column headers.  Assumes results relate "
                      "to the A1 allele")

    parser.add_option("--flip-alleles", dest="flip", action="store_true",
                      help="force alleles to flip if OR < 1")

    parser.add_option("--plot-statistic", dest="plot_stat", type="choice",
                      choices=["frequency", "cumulative"],
                      help="plot either cases frequency or cumulative "
                      "frequency of cases")

    parser.add_option("--plot-path", dest="plot_path", type="string",
                      help="save path for plot")

    parser.add_option("--flag-explained-recessive", dest="explained",
                      action="store_true",
                      help="flag individuals explained by carriage of "
                      "2 risk alleles - NOT IMPLIMENTED")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # required files are .ped file, .map file and gwas results file
    E.info("reading GWAS results file: %s" % options.gwas)
    snp_df = pd.read_table(options.gwas, sep="\t", header=0,
                           index_col=None)
    snp_list = snp_df["SNP"].values

    # parse ped file
    E.info("Reading ped file: %s" % options.ped_file)
    ped_df = gwas.parsePed(options.ped_file,
                           compound_geno=True)

    # parse map file and get SNP indices that correspond to
    # ped file genotypes
    E.info("Fetching SNPs from map file: %s" % options.map_file)
    snp_index = gwas.getSNPs(options.map_file,
                             snp_list)

    E.info("SNPs found: %i" % len(snp_index))
    # extract SNPs and ORs as key, value pairs
    or_dict = snp_df.loc[:, ["SNP", "OR"]].to_dict(orient='list')
    snp_or = dict(zip(or_dict["SNP"], or_dict["OR"]))

    if options.flip:
        E.info("Flipping major alleles to risk alleles")
        flipped_genos = gwas.flipRiskAlleles(snp_index=snp_index,
                                             snp_results=snp_or,
                                             genos=ped_df["GENOS"].tolist())
        # merge flipped genotypes with pedigree frame to get phenotypes
        geno_df = pd.DataFrame(flipped_genos, index=ped_df["FID"])
    else:
        # split genos into a dataframe
        genos = np.array(ped_df["GENOS"].tolist())
        geno_df = pd.DataFrame(genos, index=ped_df["FID"])

    merged = pd.merge(geno_df, ped_df, left_index=True, right_on="FID")

    # need to discount missing genotypes > 1%

    # frequencies of number of risk alleles by trait frequency
    E.info("count #risk alleles per individual")
    risk_results = gwas.countRiskAlleles(ped_frame=merged,
                                         snp_index=snp_index.values(),
                                         report=options.method,
                                         flag=options.explained)
    risk_freqs = risk_results["freqs"]
    cumulative = risk_results["cumulative"]
    # select results upto and including cumulative freq = 1.0
    max_indx = [fx for fx, fy in enumerate(cumulative) if fy == 1.0][0]
    max_freqs = risk_freqs[:max_indx + 1]
    max_cum = cumulative[:max_indx + 1]
    bins = [ix for ix, iy in enumerate(cumulative)][:max_indx + 1]

    # plot!
    if options.plot_stat == "frequency":
        E.info("Generating plot of #risk alleles vs. P(Phenotype)")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_freqs,
                                         savepath=options.plot_path,
                                         ytitle="P(Phenotype)")
    elif options.plot_stat == "cumulative":
        E.info("Generating plot of #risk alleles vs. cumulative frequency")
        hist_df = gwas.plotRiskFrequency(bins=bins,
                                         frequencies=max_cum,
                                         savepath=options.plot_path,
                                         ytitle="Cumulative frequency cases")

    hist_df["freq"] = risk_results["freqs"][:max_indx + 1]
    hist_df["cumulative"] = risk_results["cumulative"][:max_indx + 1]
    hist_df["cases"] = risk_results["cases"][:max_indx + 1]
    hist_df["controls"] = risk_results["controls"][:max_indx + 1]
    hist_df["total"] = hist_df["cases"] + hist_df["controls"]
    hist_df.to_csv(options.stdout, sep="\t", index=None)

    # write footer and output benchmark information.
    E.Stop()