def get_biotypes_from_attrs(attrs_tsv): """ Produces a set of biotypes from the biotype column of the attributes file. This is the GeneType column, the 3rd column. """ # skip header line return tuple(set(x[2] for x in iterRows(attrs_tsv, skipLines=1)))
def transcript_iterator(gp_file): """ Given a path to a standard genePred file return a list of GenePredTranscript objects """ with open(gp_file) as inf: for tokens in iterRows(inf): t = GenePredTranscript(tokens) yield t.name, t
def psl_iterator(psl_file): """ Iterates over PSL file generating PslRow objects returning the name and the object itself """ with open(psl_file) as inf: for tokens in iterRows(inf): psl = PslRow(tokens) yield psl.q_name, psl
def gp_chrom_filter(gp, filter_chrom=re.compile("(Y)|(chrY)")): """ Takes a genePred and lists all transcripts that match filter_chrom """ f_h = open(gp) ret = set() for x in iterRows(f_h): if filter_chrom.match(x[1]): ret.add(x[0]) return ret
def build_intervals_from_bed(bed, strand=None): """ Produces a sorted list of intervals from a BED """ r = set() for l in iterRows(bed): assert len(l) in [3, 4, 6, 12], 'Wrong BED format: {}'.format(len(l)) if strand is not None: s = strand elif len(l) < 6: s = '.' else: s = l[5] r.add(ChromosomeInterval(l[0], int(l[1]), int(l[2]), s)) r = sorted(r, key=lambda x: (x.chromosome, x.start)) return r
def find_num_rows(self): return len(list(iterRows(open(self.input_file))))
def addTupleFile(self, fname, type=int, valCol=0, cntCol=1): "add from a tab separated file of values of the specfied type and counts" assert(self.isTupleData) for row in iterRows(fname): self.data.append((type(row[valCol]), int(row[cntCol])))
def addFile(self, fname, type=int, valCol=0): "add from a tab separated file of values of the specfied type" assert(not self.isTupleData) for row in iterRows(fname): self.data.append(type(row[valCol]))
def get_gp_ids(gp): """ Get all unique gene IDs from a genePred """ return {x[0] for x in iterRows(open(gp))}
def addTupleFile(self, fname, type=int, valCol=0, cntCol=1): "add from a tab separated file of values of the specfied type and counts" assert (self.isTupleData) for row in iterRows(fname): self.data.append((type(row[valCol]), int(row[cntCol])))
def addFile(self, fname, type=int, valCol=0): "add from a tab separated file of values of the specfied type" assert (not self.isTupleData) for row in iterRows(fname): self.data.append(type(row[valCol]))
def get_common_name_map(attrs): common_name_map = {} for x in iterRows(attrs, skipLines=1): common_name_map[x[1]] = x[0] return common_name_map
def get_tx_map(attrs): tx_map = {} for x in iterRows(attrs, skipLines=1): tx_map[x[3]] = x[0] return tx_map