Ejemplo n.º 1
0
def main(args):
    # get the names from the .hud file
    names = []
    with open(os.path.expanduser(args.hud)) as fin_hud:
        for line in util.gen_nonempty_stripped_lines(fin_hud):
            name, rest = line.split(None, 1)
            names.append(name)
    # get case and control status from the matpheno file
    cases = set()
    controls = set()
    with open(os.path.expanduser(args.matpheno)) as fin_matpheno:
        for line in util.gen_nonempty_stripped_lines(fin_matpheno):
            name, classification = line.split(None, 1)
            if classification == '1':
                cases.add(name)
            elif classification == '2':
                controls.add(name)
            elif classification in ('12', 'null'):
                # skip individuals classified like this
                pass
            else:
                msg = 'invalid MAT_pheno classification: ' + classification
                raise Exception(msg)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print '\t'.join(row)
Ejemplo n.º 2
0
def main(args):
    # get the names from the .hud file
    names = []
    with open(os.path.expanduser(args.hud)) as fin_hud:
        for line in util.gen_nonempty_stripped_lines(fin_hud):
            name, rest = line.split(None, 1)
            names.append(name)
    # open the csv file
    with open(os.path.expanduser(args.csv)) as fin_csv:
        # start reading the csv file
        rows = list(csv.reader(fin_csv))
        header, data_rows = rows[0], rows[1:]
        # get case and control OTU sets
        if args.environment == 'precipitation':
            cases, controls = get_precipitation_info(data_rows,
                    args.precipitation_threshold)
        elif args.environment == 'temperature':
            cases, controls = get_temperature_info(data_rows,
                    args.temperature_threshold)
        elif args.environment == 'location':
            cases, controls = get_location_info(data_rows,
                    args.control_location)
        else:
            msg = 'unrecognized environmental variable: ' + args.environment
            raise Exception(msg)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print '\t'.join(row)
Ejemplo n.º 3
0
def main(args):
    # for each individual get the genotype of each SNP
    array_per_individual = []
    for line in util.gen_nonempty_stripped_lines(sys.stdin):
        name, genotypes = line.split(None, 1)
        arr = genotypes.split()
        array_per_individual.append(arr)
    # for each SNP get the genotype for each individual
    array_per_position = zip(*array_per_individual)
    for arr in array_per_position:
        print ''.join(arr)
Ejemplo n.º 4
0
def main(args):
    line = util.get_first(util.gen_nonempty_stripped_lines(sys.stdin))
    otu_name, genotype_string = line.split(None, 1)
    genotypes = genotype_string.split()
    for i, genotype in enumerate(genotypes):
        name = 'SNP_' + str(i)
        chromosome = '1'
        morgans = '0.0'
        bases = i+1
        row = [name, chromosome, morgans, bases]
        print '\t'.join(str(x) for x in row)
Ejemplo n.º 5
0
def main():
    values = []
    for line in util.gen_nonempty_stripped_lines(sys.stdin):
        name, gender, status = line.split()
        if status == 'Control':
            v = '0'
        elif status == 'Case':
            v = '1'
        elif status == 'Ignore':
            v = '9'
        else:
            msg = 'Invalid status: ' + status
            raise Exception(msg)
        values.append(v)
    print ''.join(values)