Esempio n. 1
0
def ParseIndList(ids):
    id_list = []
    if os.path.isfile(ids):
        file = open(ids)
        for line in file:
            words = line.strip().split()
            ExitIf(
                "%s:%s Individual file lists must contain " %
                (line.strip(), len(words)) +
                "(exactly 2 columns (first two from ped columns)",
                len(words) != 2)

            id_list.append(":".join(words[0:2]))
    else:
        id_list = ids.split(",")

    return id_list
Esempio n. 2
0
def split_mach_jobs(args, filename):
    """Parse the MACH file and generate the list of jobs.

    :param args: Arguments from parseargs
    :param filename: name of file containing list of mach dosage files
    :return jobs to be run
    """
    max_snp_count = args.snps_per_job

    job_list = {}
    cur = None
    last_pos = None
    job_string = ""
    job_name = ""

    mach_count = 1
    if args.mach_count:
        mach_count = args.mach_count

    ExitIf(
        "mvmany doesn't support splitting mach jobs into pieces at this time",
        max_snp_count > 1)

    dosage_files = []
    for line in open(filename):
        dosage_files.append(line.strip().split("/")[-1].split(".")[0])
        dosage_files.append(".".join(
            line.strip().split()[-1].split("/")[-1].split(".")[0:-1]))

    file_count = len(dosage_files)
    job_count = int(math.ceil(float(file_count) / mach_count))

    for job_num in range(job_count):
        job_idx = job_num * mach_count + 1
        job_string = "--mach-count %d --mach-offset %d" % (mach_count, job_idx)
        job_name = "job%04d-%s" % (job_num + 1, dosage_files[job_idx - 1])
        job_list[job_name] = job_string

    return job_list
Esempio n. 3
0
def split_impute_jobs(args, filename):
    """Parse the IMPUTE file and generate the list of jobs.

     :param args: parsearg object containing command line arguments
     :filename args: file containing the IMPUTE gen files and chromosome numbers
     """
    max_snp_count = args.snps_per_job

    if args.impute_count:
        impute_count = args.impute_count
    else:
        impute_count = 1

    ExitIf(
        "mvmany doesn't support splitting IMPUTE jobs into pieces at this time",
        max_snp_count > 1)

    job_list = {}
    gen_files = []
    for line in open(filename):
        gen_files.append(".".join(
            line.strip().split()[-1].split("/")[-1].split(".")[0:-1]))

    file_count = len(gen_files)
    job_count = int(math.ceil(float(file_count) / impute_count))

    for job_num in range(job_count):
        job_idx = job_num * impute_count + 1
        job_string = "--impute-offset %d --impute-count %d" % (job_idx,
                                                               impute_count)
        job_name = "job%04d-%s" % (job_num + 1, gen_files[job_idx - 1])
        job_list[job_name] = job_string
        print job_string

    return job_list

    # For now, let's not deal with the complexity of splitting chromosomes in IMPUTE
    poscol = 2
    cur = None
    last_pos = None
    job_string = ""
    job_name = ""

    file_index = 0
    for line in open(filename):
        chr, genfile = line.strip().split()

        if max_snp_count > 0:
            locus_index = 0
            last_pos = 1
            for locus in open(genfile):
                if locus_index >= max_snp_count - 1:
                    rsid, pos = locus.split()[1:2]
                    job_name = "chr%d_%d" % (chr, last_pos)
                    job_string = "--chr %s --from-bp %d --to-bp %d" % (
                        chr, last_pos, pos)
                    last_pos = pos + 1
                    job_list[job_name] = job_string
                    locus_index = 0

                if cur is None:
                    cur = pos

    for line in sys_call("cut -f 1,%d %s" % (poscol, chrom_file)):
        chrom, pos = [int(x) for x in line.split()]
        if cur is None:  # First line observed
            cur = chrom
            job_string = "--chr %d --from-bp %d" % (chrom, pos)
            job_name = "Chr%d_%d-" % (chrom, pos)
            snp_count = 0
        elif cur != cur:  # Changed chromosome
            job_string += " --to-bp %d" % (last_pos)
            job_name += str(last_pos)
            job_list[job_name] = job_string
            cur = chrom
            job_string = "--chr %d --from-bp %d" % (chrom, pos)
            job_name = "Chr%d_%d-" % (chrom, pos)
            snp_count = 0
            # create new job based on snp count
        elif snp_count < max_snp_count:
            snp_count += 1
        else:
            job_string += " --to-bp %d" % (last_pos)
            job_name += str(last_pos)
            job_list[job_name] = job_string
            job_string = "--chr %d --from-bp" % (chrom, pos)
            job_name = "Chr%d_%d-" % (chrom, pos)
            snp_count = 0

        last_pos = pos
    if job_string != "":
        job_string += " --to-bp %d" % (last_pos)
        job_name += str(last_pos)
        job_list[job_name] = job_string

    return job_list
Esempio n. 4
0
from libgwas.pheno_covar import PhenoCovar
from libgwas.exceptions import ReportableException
import libgwas.pedigree_parser as pedigree_parser
import libgwas.transposed_pedigree_parser as transposed_pedigree_parser
import libgwas.bed_parser as bed_parser
import meanvar.mv_esteq as mv_esteq
from libgwas import BuildReportLine
from libgwas import impute_parser
from libgwas import mach_parser
import libgwas.standardizer
import meanvar.mvstandardizer
from libgwas import ExitIf

__version__ = meanvar.__version__

ExitIf("mvtest.py requires python 2.7.x  to run", sys.version_info < (2, 7))

libgwas.standardizer.set_standardizer(meanvar.mvstandardizer.Standardizer)
"""usage: mvtest.py [-h] [-v] [--vall] [--chr N] [--snps SNPS] [--from-bp START]
                 [--to-bp END] [--from-kb START] [--to-kb END]
                 [--from-mb START] [--to-mb END] [--exclude EXCLUDE]
                 [--keep KEEP] [--remove REMOVE] [--file FILE] [--ped PED]
                 [--map MAP] [--map3] [--no-sex] [--no-parents] [--no-fid]
                 [--no-pheno] [--liability] [--bfile BFILE] [--bed BED]
                 [--bim BIM] [--fam FAM] [--tfile TFILE] [--tped TPED]
                 [--tfam TFAM] [--compressed] [--impute IMPUTE]
                 [--impute-fam IMPUTE_FAM] [--impute-offset IMPUTE_OFFSET]
                 [--impute-count IMPUTE_COUNT] [--impute-uncompressed]
                 [--impute-encoding {additive,dominant,recessive,genotype}]
                 [--impute-info-ext IMPUTE_INFO_EXT]
                 [--impute-gen-ext IMPUTE_GEN_EXT]