Example #1
0
    def __init__(self, sfile):
        reader = TsvReader(sfile)
        self.samcol = reader.cnames[0]
        if self.samcol == 'ROWNAMES':
            self.samcol = 'Sample'
            reader.cnames[0] = 'Sample'

        self.data = reader.dump()
        self.nrow = len(self.data)
        self.ncol = len(reader.cnames)
        self.colnames = reader.cnames
        self.rownames = [row[self.samcol] for row in self.data]

        expectColnames = ['Sample', 'Patient', 'Group', 'Batch']
        if not set(expectColnames) & set(self.colnames):
            raise SampleInfoException('Unexpected column names: %s.' %
                                      str(self.colnames))
Example #2
0
    def _read(self, sifile):
        standard_cnames = ["", "Sample", "Patient", "Group", "Batch"]
        reader = TsvReader(sifile)

        self.cnames = reader.cnames
        if not self.cnames:
            raise SampleInfoException(
                'Headers for sample information file is required.')

        if any(cname not in standard_cnames for cname in self.cnames):
            raise SampleInfoException(
                'Headers should be a subset of {!r}'.format(
                    ', '.join(standard_cnames)))

        if "" in self.cnames:
            self.cnames[self.cnames.index("")] = "Sample"

        self.mat = reader.dump()
Example #3
0
# snp gene
# SNP1 Gene10 # sorted by gene
infile = {{i.infile | quote}}
snpfile = {{o.snpfile | quote}}
genefile = {{o.genefile | quote}}
snppergene = {{args.snppergene | repr}}
nchr = {{args.nchr | repr}}
seed = {{args.seed | repr}}
# distances between genes
dist = {{args.dist | repr}}

random.seed(seed)

reader = TsvReader(infile, cnames=False)
allsnps = set(reader.dump(0))
reader.rewind()
allgenes = set(reader.dump(1))
reader.close()

# assign a probability to each snp
nsnps = len(allsnps)
ngenes = len(allgenes)
snp_probs = dict(zip(allsnps, random.choices(range(ngenes * snppergene),
                                             k=nsnps)))

genebed = TsvWriter(genefile)
snpbed = TsvWriter(snpfile)

geneperchr = math.ceil(float(ngenes) / float(nchr))
for i, gene in enumerate(allgenes):