def read_tigr_roles(filename):
    """
    Reads a truly loony file format that stores the tigrfam mainrole/subrole hierarchy.
    Returns a tuple containing a list of mainroles whose children are subroles and
    a dictionary from tigr role id to tigr roles, which can be used with the file
    TIGRFAMS_ROLE_LINK to attach TIGRFams to roles.
    """
    mainroles = []
    mainroles_by_name = {}
    mainroles_by_id = {}
    roles_by_id = {}
    with open(filename, 'r') as f:
        for line in f:
            fields = line.rstrip("\n").split("\t")
            role = OpenStruct()
            role.tigr_role_id = int(fields[1])
            role.type = fields[2].rstrip(':')
            role.name = fields[3]
            if role.type=='mainrole':
                if role.name not in mainroles_by_name:
                    mainroles.append(role)
                    mainroles_by_name[role.name] = role
                    role.children = []
                mainroles_by_id[role.tigr_role_id] = mainroles_by_name[role.name]
            elif role.type=='sub1role':
                roles_by_id[role.tigr_role_id] = role
            else:
                raise "Unknown role type: " + role.type
    # add subroles to main roles
    for id in roles_by_id:
        mainroles_by_id[id].children.append(roles_by_id[id])
    return (mainroles, roles_by_id,)
def read_tigr_roles(filename):
    """
    Reads a truly loony file format that stores the tigrfam mainrole/subrole hierarchy.
    Returns a tuple containing a list of mainroles whose children are subroles and
    a dictionary from tigr role id to tigr roles, which can be used with the file
    TIGRFAMS_ROLE_LINK to attach TIGRFams to roles.
    """
    mainroles = []
    mainroles_by_name = {}
    mainroles_by_id = {}
    roles_by_id = {}
    with open(filename, 'r') as f:
        for line in f:
            fields = line.rstrip("\n").split("\t")
            role = OpenStruct()
            role.tigr_role_id = int(fields[1])
            role.type = fields[2].rstrip(':')
            role.name = fields[3]
            if role.type=='mainrole':
                if role.name not in mainroles_by_name:
                    mainroles.append(role)
                    mainroles_by_name[role.name] = role
                    role.children = []
                mainroles_by_id[role.tigr_role_id] = mainroles_by_name[role.name]
            elif role.type=='sub1role':
                roles_by_id[role.tigr_role_id] = role
            else:
                raise "Unknown role type: " + role.type
    # add subroles to main roles
    for id in roles_by_id:
        mainroles_by_id[id].children.append(roles_by_id[id])
    return (mainroles, roles_by_id,)
Exemple #3
0
def read_genes(filename, chromosome=None, chromosome_map=None, rna=False):
    genes = []
    with open(filename, 'r') as f:
        try:
            # first two lines hold title and column headers:
            title = f.next()

            # figure out chromosome from title
            if chromosome is None:
                for key in chromosome_map:
                    if title.find(key) > -1:
                        chromosome = chromosome_map[key]
                        break

            if chromosome is None:
                raise Exception(
                    "Can't figure out chromosome for: %s\ntitle=%s", filename,
                    title)

            # parse out column headers
            columns = {}
            i = 0
            for column in f.next().strip().split("\t"):
                columns[column] = i
                i += 1
        except Exception as e:
            print "Error reading file: " + filename
            print str(type(e)) + ": " + str(e)
            return None

        try:
            # read line into objects
            for line in f:
                # strip leading and trailing whitespace
                line = line.strip()

                # skip blank lines
                if (len(line) == 0): continue

                fields = line.split("\t")

                gene = OpenStruct()
                gene.name = fields[columns['Locus_tag']]  # locus tag
                if (fields[columns['Locus']] != '-'):
                    gene.common_name = fields[columns['Locus']]  # locus
                if 'Gi' in columns:
                    gene.gi = int(fields[columns['Gi']])
                gene.geneid = int(fields[columns['GeneID']])
                gene.strand = fields[columns['Strand']]  # '+' or '-'
                gene.start = int(fields[columns['Start']])
                gene.end = int(fields[columns['End']])
                if (fields[columns['Product Name']] != '-'):
                    gene.description = fields[columns['Product Name']]  # locus
                gene.chromosome = chromosome

                if rna:
                    gene.type = guess_rna_gene_type(gene.description)
                else:
                    gene.type = 'CDS'

                genes.append(gene)
        except Exception as e:
            print "Error reading line: " + line
            print str(type(e)) + ": " + str(e)
    return genes