Example #1
0
def load_samples(args):
    s = None
    if args.text_sample_ids:
        if len(args.text_sample_ids) == 1:
            s = pandas.read_table(args.text_sample_ids[0],
                                  header=None,
                                  names=["FID", "IID"])
        elif args.text_sample_ids[1] == "UKB":
            k = pandas.read_table(args.text_sample_ids[0], sep=" ")
            k = k[k.sex != "D"].reset_index(drop=True)
            s = k[["ID_1", "ID_2"]].rename(columns={
                "ID_1": "FID",
                "ID_2": "IID"
            })
    elif args.vcf_genotypes:
        from metax.genotype import CYVCF2Genotype
        s = CYVCF2Genotype.get_samples(args.vcf_genotypes[0])
    elif args.bgen_genotypes:
        from metax.genotype import BGENGenotype
        s = BGENGenotype.get_samples(args.bgen_genotypes[0])
    elif args.generate_sample_ids:
        s = ["ID_{}".format(x) for x in range(0, args.generate_sample_ids)]
        s = [(x, x) for x in s]
        s = pandas.DataFrame(data=s, columns=["FID", "IID"])

    if s is None:
        raise Exceptions.InvalidArguments("Unsupported samples argument")
    return s
Example #2
0
def dosage_generator(args, variant_mapping=None, weights=None):
    if args.liftover:
        logging.info("Acquiring liftover conversion")
        liftover_chain = pyliftover.LiftOver(args.liftover)
        liftover_conversion = lambda chr, pos: Genomics.lift(
            liftover_chain, chr, pos, args.zero_based_positions)
    else:
        liftover_chain = None
        liftover_conversion = None

    whitelist = None
    if variant_mapping and type(variant_mapping) == dict:
        logging.info("Setting whitelist from mapping keys")
        whitelist = set(variant_mapping.keys())
    else:
        logging.info("Setting whitelist from available models")
        whitelist = set(weights.rsid)

    d = None
    if args.text_genotypes:
        from metax.genotype import DosageGenotype
        d = DosageGenotype.dosage_files_geno_lines(
            args.text_genotypes,
            variant_mapping=variant_mapping,
            whitelist=whitelist,
            skip_palindromic=args.skip_palindromic,
            liftover_conversion=liftover_conversion)
    elif args.bgen_genotypes:
        from metax.genotype import BGENGenotype
        d = BGENGenotype.bgen_files_geno_lines(
            args.bgen_genotypes,
            variant_mapping=variant_mapping,
            force_colon=args.force_colon,
            use_rsid=args.bgen_use_rsid,
            whitelist=whitelist,
            skip_palindromic=args.skip_palindromic)
    elif args.vcf_genotypes:
        from metax.genotype import CYVCF2Genotype
        d = CYVCF2Genotype.vcf_files_geno_lines(
            args.vcf_genotypes,
            mode=args.vcf_mode,
            variant_mapping=variant_mapping,
            whitelist=whitelist,
            skip_palindromic=args.skip_palindromic,
            liftover_conversion=liftover_conversion)

    if d is None:
        raise Exceptions.InvalidArguments("unsupported genotype input")
    if args.force_mapped_metadata:
        d = Genotype.force_mapped_metadata(d, args.force_mapped_metadata)
    return d