def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--task", dest="task", type="choice", choices=["merge_exclusions", "flag_hets", "find_inbreds", "flag_relations", "discordant_gender"], help="task to execute on phenotype file(s)") parser.add_option("--gender-check-file", dest="gender_check", type="string", help="output from gender checking " "by Plink, suffix should be .sexcheck") parser.add_option("--relationship-file", dest="relations", type="string", help="output file from IBS " "calculation. Should contain all pairwise " "relationships.") parser.add_option("--inbreeding-coef-file", dest="inbreed_file", type="string", help="file containing either Plink " "or GCTA estimates of F, inbreeding coefficient") parser.add_option("--inbreeding-coefficient", dest="inbred_coeff", type="choice", choices=["Fhat1", "Fhat2", "Fhat3", "F", "ibc"], help="inbreeding coefficient " "to use to identify highly inbred individuals") parser.add_option("--inbred-cutoff", dest="inbred_cutoff", type="float", help="threshold above which individuals are classed " "as inbred.") parser.add_option("--ibs-cutoff", dest="ibs_cutoff", type="float", help="IBS threshold to flag individuals as being " "closely related") parser.add_option("--trimmed-relationships", dest="rel_cutoff", type="string", help="output file from Plink " "--rel-cutoff with trimmed data set of unrelated " "individuals.") parser.add_option("--heterozygotes-file", dest="hets_file", type="string", help="file from heterozygote analysis containing observed " "homozygosity and F coefficients") parser.add_option("--auxillary-file", dest="aux_file", type="string", help="a file of IIDs and FIDs for individuals that are " "to be removed from analysis, unrelated to QC") parser.add_option("--plotting-path", dest="plot_path", type="string", help="PATH to save any plots to") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if options.task == "flag_hets": # calculate heterozygosity rates, find and flag # individuals > 3 s.d. away from mean value # rate = (nonissing - homs) / nonmissing # i.e. non-homozygote rate flags = gwas.flagExcessHets(options.hets_file, plot=True, plot_path=options.plot_path) flags.to_csv(options.stdout, index=None, sep="\t") elif options.task == "merge_exclusions": exclusions = gwas.mergeQcExclusions(hets_file=options.hets_file, inbred_file=options.inbreed_file, related_file=options.relations, gender_file=options.gender_check, mask_file=options.aux_file) exclusions.to_csv(options.stdout, index=None, sep="\t") elif options.task == "find_inbreds": inbreds = gwas.flagInbred(inbred_file=options.inbreed_file, inbreeding_coefficient=options.inbred_coeff, ibc_threshold=options.inbred_cutoff, plot=True, plot_path=options.plot_path) inbreds.to_csv(options.stdout, sep="\t", index=None) elif options.task == "flag_relations": # the input file is likely to be huge! Ergo, read the file in chunks # calculate any related individuals and store them, store # an array of IBD values for plotting, drop the rest relate = gwas.flagRelated(ibd_file=options.relations, chunk_size=500000, threshold=options.ibs_cutoff, plot=True, plotting_path=options.plot_path) elif options.task == "discordant_gender": sex_discord = gwas.flagGender(gender_file=options.gender_check, plot=True, plot_path=options.plot_path) sex_discord.to_csv(options.stdout, index=None, sep="\t") else: pass # write footer and output benchmark information. E.Stop()