def __init__(self, sfile): reader = TsvReader(sfile) self.samcol = reader.cnames[0] if self.samcol == 'ROWNAMES': self.samcol = 'Sample' reader.cnames[0] = 'Sample' self.data = reader.dump() self.nrow = len(self.data) self.ncol = len(reader.cnames) self.colnames = reader.cnames self.rownames = [row[self.samcol] for row in self.data] expectColnames = ['Sample', 'Patient', 'Group', 'Batch'] if not set(expectColnames) & set(self.colnames): raise SampleInfoException('Unexpected column names: %s.' % str(self.colnames))
def _read(self, sifile): standard_cnames = ["", "Sample", "Patient", "Group", "Batch"] reader = TsvReader(sifile) self.cnames = reader.cnames if not self.cnames: raise SampleInfoException( 'Headers for sample information file is required.') if any(cname not in standard_cnames for cname in self.cnames): raise SampleInfoException( 'Headers should be a subset of {!r}'.format( ', '.join(standard_cnames))) if "" in self.cnames: self.cnames[self.cnames.index("")] = "Sample" self.mat = reader.dump()
# snp gene # SNP1 Gene10 # sorted by gene infile = {{i.infile | quote}} snpfile = {{o.snpfile | quote}} genefile = {{o.genefile | quote}} snppergene = {{args.snppergene | repr}} nchr = {{args.nchr | repr}} seed = {{args.seed | repr}} # distances between genes dist = {{args.dist | repr}} random.seed(seed) reader = TsvReader(infile, cnames=False) allsnps = set(reader.dump(0)) reader.rewind() allgenes = set(reader.dump(1)) reader.close() # assign a probability to each snp nsnps = len(allsnps) ngenes = len(allgenes) snp_probs = dict(zip(allsnps, random.choices(range(ngenes * snppergene), k=nsnps))) genebed = TsvWriter(genefile) snpbed = TsvWriter(snpfile) geneperchr = math.ceil(float(ngenes) / float(nchr)) for i, gene in enumerate(allgenes):