def ParseIndList(ids): id_list = [] if os.path.isfile(ids): file = open(ids) for line in file: words = line.strip().split() ExitIf( "%s:%s Individual file lists must contain " % (line.strip(), len(words)) + "(exactly 2 columns (first two from ped columns)", len(words) != 2) id_list.append(":".join(words[0:2])) else: id_list = ids.split(",") return id_list
def split_mach_jobs(args, filename): """Parse the MACH file and generate the list of jobs. :param args: Arguments from parseargs :param filename: name of file containing list of mach dosage files :return jobs to be run """ max_snp_count = args.snps_per_job job_list = {} cur = None last_pos = None job_string = "" job_name = "" mach_count = 1 if args.mach_count: mach_count = args.mach_count ExitIf( "mvmany doesn't support splitting mach jobs into pieces at this time", max_snp_count > 1) dosage_files = [] for line in open(filename): dosage_files.append(line.strip().split("/")[-1].split(".")[0]) dosage_files.append(".".join( line.strip().split()[-1].split("/")[-1].split(".")[0:-1])) file_count = len(dosage_files) job_count = int(math.ceil(float(file_count) / mach_count)) for job_num in range(job_count): job_idx = job_num * mach_count + 1 job_string = "--mach-count %d --mach-offset %d" % (mach_count, job_idx) job_name = "job%04d-%s" % (job_num + 1, dosage_files[job_idx - 1]) job_list[job_name] = job_string return job_list
def split_impute_jobs(args, filename): """Parse the IMPUTE file and generate the list of jobs. :param args: parsearg object containing command line arguments :filename args: file containing the IMPUTE gen files and chromosome numbers """ max_snp_count = args.snps_per_job if args.impute_count: impute_count = args.impute_count else: impute_count = 1 ExitIf( "mvmany doesn't support splitting IMPUTE jobs into pieces at this time", max_snp_count > 1) job_list = {} gen_files = [] for line in open(filename): gen_files.append(".".join( line.strip().split()[-1].split("/")[-1].split(".")[0:-1])) file_count = len(gen_files) job_count = int(math.ceil(float(file_count) / impute_count)) for job_num in range(job_count): job_idx = job_num * impute_count + 1 job_string = "--impute-offset %d --impute-count %d" % (job_idx, impute_count) job_name = "job%04d-%s" % (job_num + 1, gen_files[job_idx - 1]) job_list[job_name] = job_string print job_string return job_list # For now, let's not deal with the complexity of splitting chromosomes in IMPUTE poscol = 2 cur = None last_pos = None job_string = "" job_name = "" file_index = 0 for line in open(filename): chr, genfile = line.strip().split() if max_snp_count > 0: locus_index = 0 last_pos = 1 for locus in open(genfile): if locus_index >= max_snp_count - 1: rsid, pos = locus.split()[1:2] job_name = "chr%d_%d" % (chr, last_pos) job_string = "--chr %s --from-bp %d --to-bp %d" % ( chr, last_pos, pos) last_pos = pos + 1 job_list[job_name] = job_string locus_index = 0 if cur is None: cur = pos for line in sys_call("cut -f 1,%d %s" % (poscol, chrom_file)): chrom, pos = [int(x) for x in line.split()] if cur is None: # First line observed cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 elif cur != cur: # Changed chromosome job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 # create new job based on snp count elif snp_count < max_snp_count: snp_count += 1 else: job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string job_string = "--chr %d --from-bp" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 last_pos = pos if job_string != "": job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string return job_list
from libgwas.pheno_covar import PhenoCovar from libgwas.exceptions import ReportableException import libgwas.pedigree_parser as pedigree_parser import libgwas.transposed_pedigree_parser as transposed_pedigree_parser import libgwas.bed_parser as bed_parser import meanvar.mv_esteq as mv_esteq from libgwas import BuildReportLine from libgwas import impute_parser from libgwas import mach_parser import libgwas.standardizer import meanvar.mvstandardizer from libgwas import ExitIf __version__ = meanvar.__version__ ExitIf("mvtest.py requires python 2.7.x to run", sys.version_info < (2, 7)) libgwas.standardizer.set_standardizer(meanvar.mvstandardizer.Standardizer) """usage: mvtest.py [-h] [-v] [--vall] [--chr N] [--snps SNPS] [--from-bp START] [--to-bp END] [--from-kb START] [--to-kb END] [--from-mb START] [--to-mb END] [--exclude EXCLUDE] [--keep KEEP] [--remove REMOVE] [--file FILE] [--ped PED] [--map MAP] [--map3] [--no-sex] [--no-parents] [--no-fid] [--no-pheno] [--liability] [--bfile BFILE] [--bed BED] [--bim BIM] [--fam FAM] [--tfile TFILE] [--tped TPED] [--tfam TFAM] [--compressed] [--impute IMPUTE] [--impute-fam IMPUTE_FAM] [--impute-offset IMPUTE_OFFSET] [--impute-count IMPUTE_COUNT] [--impute-uncompressed] [--impute-encoding {additive,dominant,recessive,genotype}] [--impute-info-ext IMPUTE_INFO_EXT] [--impute-gen-ext IMPUTE_GEN_EXT]