def get_biotypes_from_attrs(attrs_tsv):
    """
    Produces a set of biotypes from the biotype column of the attributes file.
    This is the GeneType column, the 3rd column.
    """
    # skip header line
    return tuple(set(x[2] for x in iterRows(attrs_tsv, skipLines=1)))
Example #2
0
def transcript_iterator(gp_file):
    """
    Given a path to a standard genePred file return a list of GenePredTranscript objects
    """
    with open(gp_file) as inf:
        for tokens in iterRows(inf):
            t = GenePredTranscript(tokens)
            yield t.name, t
Example #3
0
def psl_iterator(psl_file):
    """
    Iterates over PSL file generating PslRow objects returning the name and the object itself
    """
    with open(psl_file) as inf:
        for tokens in iterRows(inf):
            psl = PslRow(tokens)
            yield psl.q_name, psl
Example #4
0
def gp_chrom_filter(gp, filter_chrom=re.compile("(Y)|(chrY)")):
    """
    Takes a genePred and lists all transcripts that match filter_chrom
    """
    f_h = open(gp)
    ret = set()
    for x in iterRows(f_h):
        if filter_chrom.match(x[1]):
            ret.add(x[0])
    return ret
Example #5
0
def build_intervals_from_bed(bed, strand=None):
    """
    Produces a sorted list of intervals from a BED
    """
    r = set()
    for l in iterRows(bed):
        assert len(l) in [3, 4, 6, 12], 'Wrong BED format: {}'.format(len(l))
        if strand is not None:
            s = strand
        elif len(l) < 6:
            s = '.'
        else:
            s = l[5]
        r.add(ChromosomeInterval(l[0], int(l[1]), int(l[2]), s))
    r = sorted(r, key=lambda x: (x.chromosome, x.start))
    return r
 def find_num_rows(self):
     return len(list(iterRows(open(self.input_file))))
Example #7
0
 def addTupleFile(self, fname, type=int, valCol=0, cntCol=1):
     "add from a tab separated file of values of the specfied type and counts"
     assert(self.isTupleData)
     for row in iterRows(fname):
         self.data.append((type(row[valCol]), int(row[cntCol])))
Example #8
0
 def addFile(self, fname, type=int, valCol=0):
     "add from a tab separated file of values of the specfied type"
     assert(not self.isTupleData)
     for row in iterRows(fname):
         self.data.append(type(row[valCol]))
Example #9
0
def get_gp_ids(gp):
    """
    Get all unique gene IDs from a genePred
    """
    return {x[0] for x in iterRows(open(gp))}
 def addTupleFile(self, fname, type=int, valCol=0, cntCol=1):
     "add from a tab separated file of values of the specfied type and counts"
     assert (self.isTupleData)
     for row in iterRows(fname):
         self.data.append((type(row[valCol]), int(row[cntCol])))
 def addFile(self, fname, type=int, valCol=0):
     "add from a tab separated file of values of the specfied type"
     assert (not self.isTupleData)
     for row in iterRows(fname):
         self.data.append(type(row[valCol]))
def get_common_name_map(attrs):
    common_name_map = {}
    for x in iterRows(attrs, skipLines=1):
        common_name_map[x[1]] = x[0]
    return common_name_map
def get_tx_map(attrs):
    tx_map = {}
    for x in iterRows(attrs, skipLines=1):
        tx_map[x[3]] = x[0]
    return tx_map