if metafile: logger.info('Reading metafile ...') metadata = dict( TsvReader(metafile, cnames=True, row=lambda r: tuple( (r.IID, r))).dump()) else: metadata = None logger.info('Reading genotype matrix ...') # snp1 gt1s1 gt1s2 ... inreader = TsvReader(infile, cnames=True) samples = inreader.meta[1:] logger.info('Writing tfam file ...') tfamWriter = TsvWriter(tfamfile) tfamWriter.meta = ['FID', 'IID', 'PID', 'MID', 'Sex', 'Pheno'] #tfamWriter.writeHead(callback = lambda meta: '#' + '\t'.join(meta)) if not metadata: for s in samples: tfamWriter.write([s, s, '0', '0', 'other', '-9']) else: for s in samples: tfamWriter.write([ metadata[s].FID if s in metadata and 'FID' in metadata[s] else s, s, (metadata[s].PID or '0') if s in metadata and 'PID' in metadata[s] else '0', (metadata[s].MID or '0') if s in metadata and 'MID' in metadata[s] else '0', (metadata[s].Sex or 'other') if s in metadata and 'Sex' in metadata[s] else 'other', (metadata[s].Pheno or '-9')
indata1 = TsvReader(infile1, **inopts1) indata2 = TsvReader(infile2, **inopts2) cnames1 = indata1.meta if not rnames1 else indata1.meta[1:] cnames2 = indata2.meta if not rnames2 else indata2.meta[1:] paired = list(set(cnames1) & set(cnames2)) cnames1 = cnames2 = paired if rnames1: cnames1 = [indata1.meta[0]] + cnames1 if rnames2: cnames2 = [indata2.meta[0]] + cnames2 cindex1 = [indata1.meta.index(c) for c in cnames1] cindex2 = [indata2.meta.index(c) for c in cnames2] outdata1 = TsvWriter(outfile1) outdata2 = TsvWriter(outfile2) outdata1.meta = cnames1 outdata2.meta = cnames2 outdata1.writeHead() outdata2.writeHead() for r1 in indata1: outdata1.write(r1[i] for i in cindex1) outdata1.close() for r2 in indata2: outdata2.write(r2[i] for i in cindex2) outdata2.close()