def main(args): # get the names from the .hud file names = [] with open(os.path.expanduser(args.hud)) as fin_hud: for line in util.gen_nonempty_stripped_lines(fin_hud): name, rest = line.split(None, 1) names.append(name) # get case and control status from the matpheno file cases = set() controls = set() with open(os.path.expanduser(args.matpheno)) as fin_matpheno: for line in util.gen_nonempty_stripped_lines(fin_matpheno): name, classification = line.split(None, 1) if classification == '1': cases.add(name) elif classification == '2': controls.add(name) elif classification in ('12', 'null'): # skip individuals classified like this pass else: msg = 'invalid MAT_pheno classification: ' + classification raise Exception(msg) # write the .ind file contents for name in names: gender = 'U' classification = 'Ignore' if name in cases: classification = 'Case' elif name in controls: classification = 'Control' row = [name, gender, classification] print '\t'.join(row)
def main(args): # get the names from the .hud file names = [] with open(os.path.expanduser(args.hud)) as fin_hud: for line in util.gen_nonempty_stripped_lines(fin_hud): name, rest = line.split(None, 1) names.append(name) # open the csv file with open(os.path.expanduser(args.csv)) as fin_csv: # start reading the csv file rows = list(csv.reader(fin_csv)) header, data_rows = rows[0], rows[1:] # get case and control OTU sets if args.environment == 'precipitation': cases, controls = get_precipitation_info(data_rows, args.precipitation_threshold) elif args.environment == 'temperature': cases, controls = get_temperature_info(data_rows, args.temperature_threshold) elif args.environment == 'location': cases, controls = get_location_info(data_rows, args.control_location) else: msg = 'unrecognized environmental variable: ' + args.environment raise Exception(msg) # write the .ind file contents for name in names: gender = 'U' classification = 'Ignore' if name in cases: classification = 'Case' elif name in controls: classification = 'Control' row = [name, gender, classification] print '\t'.join(row)
def main(args): # for each individual get the genotype of each SNP array_per_individual = [] for line in util.gen_nonempty_stripped_lines(sys.stdin): name, genotypes = line.split(None, 1) arr = genotypes.split() array_per_individual.append(arr) # for each SNP get the genotype for each individual array_per_position = zip(*array_per_individual) for arr in array_per_position: print ''.join(arr)
def main(args): line = util.get_first(util.gen_nonempty_stripped_lines(sys.stdin)) otu_name, genotype_string = line.split(None, 1) genotypes = genotype_string.split() for i, genotype in enumerate(genotypes): name = 'SNP_' + str(i) chromosome = '1' morgans = '0.0' bases = i+1 row = [name, chromosome, morgans, bases] print '\t'.join(str(x) for x in row)
def main(): values = [] for line in util.gen_nonempty_stripped_lines(sys.stdin): name, gender, status = line.split() if status == 'Control': v = '0' elif status == 'Case': v = '1' elif status == 'Ignore': v = '9' else: msg = 'Invalid status: ' + status raise Exception(msg) values.append(v) print ''.join(values)