def read_cogs(filename):
    """
    Read COG functions.
    """
    cog_re = re.compile(r'\[(\w+)\]\s+(COG\d+)\s+(.*)')
    cogs = []
    with open(filename, 'r') as f:
        for line in f:
            m = cog_re.match(line)
            if m:
                cog = OpenStruct()
                cog.id = m.group(2)
                cog.name = m.group(3)
                cog.parents = m.group(1)
                cog.namespace = 'cog'
                cogs.append(cog)
    return cogs
Example #2
0
def read_cogs(filename):
    """
    Read COG functions.
    """
    cog_re = re.compile(r'\[(\w+)\]\s+(COG\d+)\s+(.*)')
    cogs = []
    with open(filename, 'r') as f:
        for line in f:
            m = cog_re.match(line)
            if m:
                cog = OpenStruct()
                cog.id = m.group(2)
                cog.name = m.group(3)
                cog.parents = m.group(1)
                cog.namespace = 'cog'
                cogs.append(cog)
    return cogs
Example #3
0
def read_tigrfams_by_role(filename):
    """
    This is no longer used!
    Reads the hierarchical structure of TIGRFams organized into categories called roles.
    Returns a nested list structure of roles and sub-roles that hold tigrfams.
    Downloaded file from here: http://cmr.jcvi.org/tigr-scripts/CMR/shared/EvidenceList.cgi?ev_type=TIGRFAM&order_type=role
    Note the TIGRFams flat file is more complete than the TIGRFams by role file.
    """
    # we'll be making and returning a nested list of tigr roles holding tigrfams
    tigrfams_by_role = []

    with open(filename, 'r') as f:
        
        category = None
        subcategory = None
        
        for line in f:
            
            # skip blank lines
            if len(line.strip())==0:
                continue
            
            if line.startswith("      "):
                fields = line.lstrip(' ').rstrip("\n").split("\t")
                # skip column headers
                if fields[0] == 'Accession':
                    continue
                tigrfam = OpenStruct()
                tigrfam.id = fields[0]
                tigrfam.name = fields[1]
                tigrfam.description = fields[2]
                subcategory['tigrfams'].append(tigrfam)
            elif line.startswith("   "):
                name = line.strip()
                subcategory = {'name':name, 'tigrfams':[]}
                category['roles'].append(subcategory)
            else:
                name = line.strip()
                category = {'name':name, 'roles':[]}
                tigrfams_by_role.append(category)
        
    return tigrfams_by_role
def read_tigrfams_by_role(filename):
    """
    This is no longer used!
    Reads the hierarchical structure of TIGRFams organized into categories called roles.
    Returns a nested list structure of roles and sub-roles that hold tigrfams.
    Downloaded file from here: http://cmr.jcvi.org/tigr-scripts/CMR/shared/EvidenceList.cgi?ev_type=TIGRFAM&order_type=role
    Note the TIGRFams flat file is more complete than the TIGRFams by role file.
    """
    # we'll be making and returning a nested list of tigr roles holding tigrfams
    tigrfams_by_role = []

    with open(filename, 'r') as f:
        
        category = None
        subcategory = None
        
        for line in f:
            
            # skip blank lines
            if len(line.strip())==0:
                continue
            
            if line.startswith("      "):
                fields = line.lstrip(' ').rstrip("\n").split("\t")
                # skip column headers
                if fields[0] == 'Accession':
                    continue
                tigrfam = OpenStruct()
                tigrfam.id = fields[0]
                tigrfam.name = fields[1]
                tigrfam.description = fields[2]
                subcategory['tigrfams'].append(tigrfam)
            elif line.startswith("   "):
                name = line.strip()
                subcategory = {'name':name, 'tigrfams':[]}
                category['roles'].append(subcategory)
            else:
                name = line.strip()
                category = {'name':name, 'roles':[]}
                tigrfams_by_role.append(category)
        
    return tigrfams_by_role
Example #5
0
def read_cog_categories(filename):
    """
    Read COG functional categories (see http://www.ncbi.nlm.nih.gov/COG/grace/fiew.cgi)
    """
    cog_categories = []
    parent = None
    with open(filename, 'r') as f:
        for line in f:
            c = OpenStruct()
            if re.match("[A-Z]\t.*", line):
                fields = line.rstrip("\n").split("\t")
                c.id = fields[0]
                c.name = fields[3]
                c.parents = (parent,)
                c.namespace = "cog subcategory"
            else:
                c.name = line.rstrip("\n")
                c.namespace = "cog category"
                parent = c.name
            cog_categories.append(c)
    return cog_categories
Example #6
0
def read_tigrfams(filename):
    """
    Read the flat listing of TIGRFams.
    Note the TIGRFams flat file is more complete than the TIGRFams by role file.
    The flat file is a superset of the by-role file.
    """
    tigrfams = []
    with open(filename, 'r') as f:

        #skip header
        line = f.next()

        for line in f:
            fields = line.rstrip("\n").split("\t")
            tigrfam = OpenStruct()
            tigrfam.id = fields[0]
            tigrfam.name = fields[1]
            tigrfam.description = fields[2]
            tigrfams.append(tigrfam)

    return tigrfams
def read_cog_categories(filename):
    """
    Read COG functional categories (see http://www.ncbi.nlm.nih.gov/COG/grace/fiew.cgi)
    """
    cog_categories = []
    parent = None
    with open(filename, 'r') as f:
        for line in f:
            c = OpenStruct()
            if re.match("[A-Z]\t.*", line):
                fields = line.rstrip("\n").split("\t")
                c.id = fields[0]
                c.name = fields[3]
                c.parents = (parent,)
                c.namespace = "cog subcategory"
            else:
                c.name = line.rstrip("\n")
                c.namespace = "cog category"
                parent = c.name
            cog_categories.append(c)
    return cog_categories
def read_tigrfams(filename):
    """
    Read the flat listing of TIGRFams.
    Note the TIGRFams flat file is more complete than the TIGRFams by role file.
    The flat file is a superset of the by-role file.
    """
    tigrfams = []
    with open(filename, 'r') as f:

        #skip header
        line = f.next()

        for line in f:
            fields = line.rstrip("\n").split("\t")
            tigrfam = OpenStruct()
            tigrfam.id = fields[0]
            tigrfam.name = fields[1]
            tigrfam.description = fields[2]
            tigrfams.append(tigrfam)

    return tigrfams