Python BoundaryCheck.LoadSNPs Examples

Programming Language: Python

Namespace/Package Name: libgwas.boundary

Class/Type: BoundaryCheck

Method/Function: LoadSNPs

Examples at hotexamples.com: 2

Python BoundaryCheck.LoadSNPs - 2 examples found. These are the top rated real world Python examples of libgwas.boundary.BoundaryCheck.LoadSNPs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BoundaryCheck(30)

TestBoundary(9)

NoExclusions(8)

set_chrom(4)

LoadSNPs(2)

LoadExclusions(1)

Example #1

Show file

File: test_boundary.py Project: edwards-lab/libGWAS

 def testBoundaryInitBPWithInclusions(self):
     BoundaryCheck.chrom = 1
     b = BoundaryCheck(bp=[10000, 500000])
     b.LoadSNPs(["rs12345", "rs23456"])
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.valid)
     self.assertEqual(False, b.TestBoundary(1, 500, ""))
     self.assertEqual(True, b.TestBoundary(1, 10000, ""))
     self.assertEqual(True, b.TestBoundary(1, 500000, ""))
     self.assertEqual(True, b.TestBoundary(1, 250000, ""))
     self.assertEqual(False, b.TestBoundary(2, 250000, ""))
     self.assertEqual(False, b.TestBoundary(10, 10000, ""))
     self.assertTrue(b.TestBoundary(1, 1000000, "rs12345"))
     self.assertTrue(b.TestBoundary(1, 1200000, "rs23456"))
     self.assertFalse(b.TestBoundary(1, 1200011, "rs345678"))

Example #2

Show file

    def LoadCmdLine(self, args=sys.argv[1:]):
        """Parse user arguments using argparse and set up components"""
        parser = argparse.ArgumentParser(description="MV Test: " + __version__,
                                         epilog="""
mvtest.py is uses many of the same arguments as plink, but there are a few
differences, so please consider the list above carefully.
        """)

        parser.add_argument("-v",
                            action='store_true',
                            help="Print version number")
        parser.add_argument(
            "--vall",
            action='store_true',
            help="Print version number along with each dependency")

        parser.add_argument("--chr",
                            type=int,
                            default=-1,
                            metavar="N",
                            help="Select Chromosome")
        parser.add_argument(
            "--snps",
            type=str,
            default="",
            help="Comma-delimited list of SNP(s): rs1,rs2,rs3-rs6")
        parser.add_argument("--from-bp",
                            type=int,
                            metavar="START",
                            help="SNP range start")
        parser.add_argument("--to-bp",
                            type=int,
                            metavar="END",
                            help="SNP range end")
        parser.add_argument("--from-kb",
                            type=int,
                            metavar="START",
                            help="SNP range start")
        parser.add_argument("--to-kb",
                            type=int,
                            metavar="END",
                            help="SNP range end")
        parser.add_argument("--from-mb",
                            type=int,
                            metavar="START",
                            help="SNP range start")
        parser.add_argument("--to-mb",
                            type=int,
                            metavar="END",
                            help="SNP range end")
        parser.add_argument(
            "--exclude",
            type=str,
            default="",
            help="Comma-delimited list of rsids to be excluded")

        # For now, I'm not implementing keep, since we don't have any real meaningful need for analyzing individuals
        # PLINK does, but we don't do the QC stuff they do.
        parser.add_argument(
            "--keep",
            type=str,
            default="",
            help="Comma-delimited list of individuals to be analyzed")
        parser.add_argument(
            "--remove",
            type=str,
            default="",
            help=
            "Comma-delimited list of individuals to be removed from analysis")

        parser.add_argument("--file",
                            type=str,
                            help="Prefix for .ped and .map files")
        parser.add_argument("--ped",
                            type=argparse.FileType('r'),
                            help="PLINK compatible .ped file")
        parser.add_argument("--map",
                            type=argparse.FileType('r'),
                            help="PLINK compatible .map file")
        parser.add_argument("--map3",
                            action='store_true',
                            help="MAP file has only 3 columns")
        parser.add_argument("--no-sex",
                            action='store_true',
                            help="Pedigree file doesn't have column 5 (sex)")
        parser.add_argument(
            "--no-parents",
            action="store_true",
            help="Pedigree file doesn't have columns 3 and 4 (parents)")
        parser.add_argument(
            "--no-fid",
            action="store_true",
            help="Pedigree file doesn't have column 1 (family ID)")
        parser.add_argument(
            "--no-pheno",
            action="store_true",
            help="Pedigree file doesn't have column 6 (phenotype")
        parser.add_argument("--liability",
                            action="store_true",
                            help="Pedigree file has column 7 (liability)")

        parser.add_argument("--bfile",
                            type=str,
                            help="Prefix for .bed, .bim and .fam files")
        parser.add_argument("--bed",
                            type=argparse.FileType('r'),
                            help="Binary Ped file (.bed)")
        parser.add_argument("--bim",
                            type=argparse.FileType('r'),
                            help="Binary ped marker file (.bim)")
        parser.add_argument("--fam",
                            type=argparse.FileType('r'),
                            help="Binary ped family file (.fam)")

        parser.add_argument("--tfile",
                            type=str,
                            help="Prefix for .tped and .tfam files")
        parser.add_argument("--tped",
                            type=argparse.FileType('r'),
                            help="Transposed Pedigree file (.tped)")
        parser.add_argument("--tfam",
                            type=argparse.FileType('r'),
                            help="Transposed pedigre Family file (.tfam)")
        parser.add_argument(
            "--compressed",
            action="store_true",
            help="Ped/TPed compressed with gzip (named .ped.tgz or .tped.tgz)")

        parser.add_argument(
            "--impute",
            type=argparse.FileType('r'),
            help="File containing list of impute output for analysis")
        parser.add_argument(
            "--impute-fam",
            type=argparse.FileType('r'),
            help="File containing family details for impute data")
        parser.add_argument(
            "--impute-offset",
            type=int,
            default=-1,
            help="Impute file index (1 based) to begin analysis")
        parser.add_argument(
            "--impute-count",
            type=int,
            default=-1,
            help="Number of impute files to process (for this node)")
        parser.add_argument(
            "--impute-uncompressed",
            action="store_true",
            help="Indicate that the impute input is not gzipped, but plain text"
        )
        parser.add_argument(
            "--impute-encoding",
            type=str,
            choices=['additive', 'dominant', 'recessive', 'genotype'],
            default='additive',
            help='Genetic model to be used')
        parser.add_argument("--impute-info-ext",
                            type=str,
                            default='info',
                            help="Portion of filename denotes info filename")
        parser.add_argument("--impute-gen-ext",
                            type=str,
                            default='gen.gz',
                            help="Portion of filename that denotes gen file")
        parser.add_argument(
            "--impute-info-thresh",
            type=float,
            default=0.4,
            help="Threshold for filtering imputed SNPs with poor 'info' values"
        )

        parser.add_argument(
            "--mach",
            type=argparse.FileType('r'),
            help="File containing list of MACH output for analysis")
        parser.add_argument("--mach-offset",
                            type=int,
                            default=-1,
                            help="Mach file index (1 based) to begin analysis")
        parser.add_argument(
            "--mach-count",
            type=int,
            default=-1,
            help="Number of mach files to process (for this node)")
        parser.add_argument("--mach-uncompressed",
                            action="store_true",
                            help="Indicate that the mach input is not gzipped")
        parser.add_argument(
            "--mach-chunk-size",
            type=int,
            default=100000,
            help=
            "Max number of loci to load at once (higher increases memory requirements with some speed benefits)"
        )
        parser.add_argument("--mach-info-ext",
                            type=str,
                            default="info.gz",
                            help="Portion of filename denotes info filenames")
        parser.add_argument("--mach-dose-ext",
                            type=str,
                            default="dose.gz",
                            help="Portion of filename that denotes dose files")
        parser.add_argument("--mach-min-rsquared",
                            type=float,
                            default=0.3,
                            help="Filter out loci with RSquared < this value")
        parser.add_argument(
            "--mach-chrpos",
            action="store_true",
            help=
            "When true, first col in .info file must be chr:pos (additional pieces allowed)"
        )

        parser.add_argument("--pheno",
                            type=argparse.FileType('r'),
                            help="File containing phenotypes")
        parser.add_argument("--sample-pheno",
                            type=argparse.FileType('r'),
                            help="(Mach) Sample file containing phenotypes")
        parser.add_argument(
            "--mphenos",
            type=str,
            default="",
            help=
            "Column number(s) for phenotype to be analyzed if number of columns > 1"
        )
        parser.add_argument(
            "--pheno-names",
            type=str,
            default="",
            help=
            "Name for phenotype(s) to be analyzed (must be in --pheno file)")
        parser.add_argument("--all-pheno",
                            action="store_true",
                            help="Analyze all columns from the phenotype file")
        #parser.add_argument("--all-pheno", action='store_true', help="Analyze each phenotype")

        parser.add_argument("--covar",
                            type=argparse.FileType('r'),
                            help="File containing covariates")
        parser.add_argument("--sample-covar",
                            type=argparse.FileType('r'),
                            help="(Mach) Sample file containing covariates")
        parser.add_argument("--covar-numbers",
                            type=str,
                            default="",
                            help="Comma-separated list of covariate indices")
        parser.add_argument("--covar-names",
                            type=str,
                            default="",
                            help="Comma-separated list of covariate names")
        parser.add_argument(
            "--sex",
            action='store_true',
            help="Use sex from the pedigree file as a covariate")
        parser.add_argument("--missing-phenotype",
                            type=float,
                            default=-9.0,
                            help="Encoding for missing phenotypes")

        parser.add_argument("--maf",
                            type=float,
                            default=0.0,
                            help="Minimum MAF allowed for analysis")
        parser.add_argument("--max-maf",
                            type=float,
                            default=1.0,
                            help="MAX MAF allowed for analysis")
        parser.add_argument("--geno",
                            type=float,
                            default=1.0,
                            help="MAX per-SNP missing for analysis")
        parser.add_argument("--mind",
                            type=float,
                            default=1.0,
                            help="MAX per-person missing")

        parser.add_argument("--verbose",
                            action='store_true',
                            help="Output additional data details")

        parser.set_defaults(all_pheno=False, sex=False, mach_chrpos=False)
        args = parser.parse_args(args)

        # Report version, if requested, and exit
        if args.v:
            print("%s: %s" % (os.path.basename(__file__), __version__),
                  file=sys.stderr)
            sys.exit(0)

        if args.vall:
            print("%s: %s" % (os.path.basename(__file__), __version__),
                  file=sys.stderr)
            print("%s: %s" %
                  (os.path.dirname(libgwas.__file__), libgwas.__version__),
                  file=sys.stderr)
            print("%s: %s" %
                  (os.path.dirname(scipy.__file__), scipy.__version__),
                  file=sys.stderr)
            print("%s: %s" %
                  (os.path.dirname(numpy.__file__), numpy.__version__),
                  file=sys.stderr)
            sys.exit(0)

        ###############################################################################################################
        # Here we deal with the various ways we filter SNPs in and out of anlysis
        # We might handle MACH files differently. We'll default the chromosome
        # to be "NA" which is how those can be returned.
        if args.mach is None or args.mach_chrpos:
            BoundaryCheck.chrom = args.chr
        else:
            if args.chr != -1:
                libgwas.Exit(
                    ("Positional based filtering (--chr, --from/--to)" +
                     " only work with mach_chrpos. See manual for details."))
            BoundaryCheck.chrom = "NA"
        snps = args.snps.split(",")
        try:
            b = BoundaryCheck(bp=(args.from_bp, args.to_bp),
                              kb=(args.from_kb, args.to_kb),
                              mb=(args.from_mb, args.to_mb))
        except InvalidBoundarySpec as e:
            print("Invalid boundary spec associated: %s" %
                  (e.malformed_boundary),
                  file=sys.stderr)
            sys.exit(1)
        try:
            s = SnpBoundaryCheck(snps=snps)
        except InvalidBoundarySpec as e:
            print("Invalid SNP boundary defined: %s" % (e.malformed_boundary),
                  file=sys.stderr)
            print(
                "SNPs must be either single or have be a range such as rs123-rs345",
                file=sys.stderr)
            sys.exit(1)

        if b.valid and s.valid:
            print(
                "Only one type of boundary conditions is permitted. Either use --from-bp, etc. or rs123-rs345. ",
                file=sys.stderr)
            sys.exit(1)

        if len(b.bounds) > 0 and not b.valid:
            if BoundaryCheck.chrom == "NA":
                libgwas.Exit(
                    ("Positional based filtering (--chr, --from/--to)" +
                     " only work with mach_chrpos. See manual for details."))

        if s.valid:
            DataParser.boundary = s
        # If b isn't valid, we still want to potentially allow for chr and SNPs, it just won't have
        else:
            b.LoadSNPs(snps)
            # any actual boundary listings
            DataParser.boundary = b
        DataParser.boundary.LoadExclusions(snps=args.exclude.split(","))

        ###############################################################################################################
        # Setup the various Dataset filter criteria
        DataParser.min_maf = args.maf
        DataParser.max_maf = args.max_maf
        DataParser.snp_miss_tol = args.geno
        DataParser.ind_miss_tol = args.mind

        DataParser.ind_exclusions = ParseIndList(args.remove)

        PhenoCovar.sex_as_covariate = args.sex

        if args.compressed:
            DataParser.compressed_pedigree = True

        DataParser.has_sex = not args.no_sex
        DataParser.has_parents = not args.no_parents
        DataParser.has_fid = not args.no_fid
        DataParser.has_pheno = not args.no_pheno
        DataParser.has_liability = args.liability

        pheno_covar = PhenoCovar()
        self.verbose = False
        if args.verbose:
            self.verbose = True

        if args.file != None or args.ped or args.map:
            if args.ped and not args.map or args.map and not args.ped:
                print(
                    "When analyzing pedigree data, both .map and .ped must be specified",
                    file=sys.stderr)
                sys.exit(1)
            if args.ped:
                dataset = pedigree_parser.Parser(args.map.name, args.ped.name)
            else:
                dataset = pedigree_parser.Parser("%s.map" % (args.file),
                                                 "%s.ped" % (args.file))

            dataset.load_mapfile(map3=args.map3)
            dataset.load_genotypes(pheno_covar)
        elif args.tfile != None or args.tped or args.tfam:
            if args.tped and not args.tfam or args.tfam and not args.tped:
                print(
                    "When analyzing transposed pedigree data, both .tfam and .tped must be specified",
                    file=sys.stderr)
                sys.exit(1)
            if args.tped:
                dataset = transposed_pedigree_parser.Parser(
                    args.tfam.name, args.tped.name)
            else:
                dataset = transposed_pedigree_parser.Parser(
                    "%s.tfam" % (args.tfile), "%s.tped" % (args.tfile))
            dataset.load_tfam(pheno_covar)
            dataset.load_genotypes()
        elif args.bfile != None:
            dataset = bed_parser.Parser("%s.fam" % (args.bfile),
                                        "%s.bim" % (args.bfile),
                                        "%s.bed" % (args.bfile))
            dataset.load_bim(map3=args.map3)
            dataset.load_fam(pheno_covar)
            dataset.load_genotypes()
        elif args.bed or args.bim or args.fam:
            if (args.bed and not args.fam or not args.bim) or (
                    args.bim and not args.bed
                    or not args.fam) or (args.fam and not args.bed
                                         or not args.bim):
                print(
                    "When analyzing binary pedigree data, .bed, .bim and .fam files must be provided",
                    file=sys.stderr)
                sys.exit(1)
            dataset = bed_parser.Parser(args.fam, args.bim, args.bed)
            dataset.load_bim(map3=args.map3)
            dataset.load_fam(pheno_covar)
            dataset.load_genotypes()
        elif args.impute:
            DataParser.compressed_pedigree = not args.impute_uncompressed

            if (args.impute_offset > 0 and args.impute_count == -1) or (
                    args.impute_offset == -1 and args.impute_count > 0):
                print(
                    "--impute-count and --impute_offset must both > 0 if one is set other than -1.  ",
                    file=sys.stderr)
                sys.exit(1)
            if DataParser.snp_miss_tol != 1.0:
                print("--geno does not have any impact on imputed data",
                      file=sys.stderr)
                sys.exit(1)
            if DataParser.ind_miss_tol != 1.0:
                print("--mind does not have any impact on imputed data",
                      file=sys.stderr)
                sys.exit(1)
            impute_parser.SetEncoding(args.impute_encoding)
            impute_parser.Parser.info_ext = args.impute_info_ext
            impute_parser.Parser.info_threshold = args.impute_info_thresh
            libgwas.ExitIf(
                "--impute-fam is required for when processing imputed data",
                args.impute_fam == None)
            archives, chroms, infos = self.ParseImputeFile(
                args.impute.name, args.impute_offset, args.impute_count)
            dataset = impute_parser.Parser(args.impute_fam.name, archives,
                                           chroms, infos)
            dataset.load_family_details(pheno_covar)
            dataset.load_genotypes()
        elif args.mach:

            DataParser.compressed_pedigree = not args.mach_uncompressed
            if (args.mach_offset > 0
                    and args.mach_count == -1) or (args.mach_offset == -1
                                                   and args.impute_count > 0):
                print(
                    "--mach-count and --mach_offset must both be > 0 if one is set other than -1. ",
                    file=sys.stderr)
                sys.exit(1)
            if DataParser.snp_miss_tol != 1.0:
                print("--geno does not have any impact on imputed data",
                      file=sys.stderr)
                sys.exit(1)
            if DataParser.ind_miss_tol != 1.0:
                print("--mind does not have any impact on imputed data",
                      file=sys.stderr)
                sys.exit(1)
            if BoundaryCheck.chrom != "NA" and not args.mach_chrpos:
                libgwas.Exit(
                    ("Positional based filtering (--chr, --from/--to)" +
                     " only work with mach_chrpos. See manual for details."))
            mach_parser.Parser.chrpos_encoding = args.mach_chrpos
            mach_parser.Parser.info_ext = args.mach_info_ext
            mach_parser.Parser.dosage_ext = args.mach_dose_ext
            mach_parser.Parser.chunk_stride = args.mach_chunk_size
            mach_parser.Parser.min_rsquared = args.mach_min_rsquared
            archives, infos = self.ParseMachFile(args.mach.name,
                                                 args.mach_offset,
                                                 args.mach_count)
            dataset = mach_parser.Parser(archives, infos)
            dataset.load_family_details(pheno_covar)
            dataset.load_genotypes()

        else:
            parser.print_usage(sys.stderr)
            print(
                "\nNo data has been specified. Users must specify either pedigree or transposed pedigree to continue",
                file=sys.stderr)
            sys.exit(1)

        if args.pheno or args.sample_pheno:
            mphenos = []
            if args.mphenos != "":
                mphenos = args.mphenos.split(",")

            nphenos = []
            if args.pheno_names != "":
                nphenos = args.pheno_names.split(",")

            if len(mphenos) + len(nphenos) == 0 and not args.all_pheno:
                libgwas.Exit("You must select one or more phenotypes when ")
            sample_file = False
            pheno_filename = args.pheno
            if args.sample_pheno:
                pheno_filename = args.sample_pheno
                sample_file = True
            pheno_covar.load_phenofile(pheno_filename, mphenos, nphenos,
                                       sample_file)

        if args.covar:
            pheno_covar.load_covarfile(args.covar,
                                       args.covar_numbers.split(","),
                                       args.covar_names.split(","))
        pheno_covar.do_standardize_variables = True
        return dataset, pheno_covar