Ejemplo n.º 1
0
def getGenomeNeighborhoodsAndRoles(genomes, config):
    cdmi_entity = CDMI_EntityAPI(config["cdmi_url"])

    pegs = genomesToPegs(genomes)
    # Get contigs
    fidlocdict = cdmi_entity.get_relationship_IsLocatedIn(pegs, [], ["begin", "dir"], ["id"])
    fids = getFieldFromRelationship(fidlocdict, "from_link", "rel")
    begins = getFieldFromRelationship(fidlocdict, "begin", "rel")
    dirs = getFieldFromRelationship(fidlocdict, "dir", "rel")
    cids = getFieldFromRelationship(fidlocdict, "id", "to")

    tuplist = []
    for ii in range(len(cids)):
        tuplist.append( (cids[ii], fids[ii], int(begins[ii]), dirs[ii]) )
    # Sort by contig first, then by start location.
    tuplist = sorted(tuplist, key=operator.itemgetter(0,2))

    # Now lets get the role for all of these IDs
    # Note that a single protein can have multiple roles.
    roledict = cdmi_entity.get_relationship_HasFunctional(fids, [], [], ["id"])
    fids = getFieldFromRelationship(roledict, "from_link", "rel")
    roles = getFieldFromRelationship(roledict, "id", "to")
    fidToRoles = {}
    rolesToFids = {}
    for ii in range(len(fids)):
        if fids[ii] in fidToRoles:
            fidToRoles[fids[ii]].append(roles[ii])
        else:
            fidToRoles[fids[ii]] = [ roles[ii] ]
        if roles[ii] in rolesToFids:
            rolesToFids[roles[ii]].append(fids[ii])
        else:
            rolesToFids[roles[ii]] = [ fids[ii] ]
    return tuplist, fidToRoles
Ejemplo n.º 2
0
def fidsToRoles(fidlist, config):
    ''' Given a list of feature IDs return a dictionary from FID to the list of roles the encoding gene
        performs and a dictionary from roles to the FIDs performing them.

        @param fidlist List of feature IDs
        @param config Dictionary of configuration variables
        @return Dictionary keyed by feature ID of list of roles encoding gene performs, dictionary
            keyed by role of list of feature IDs performing the role
    '''

    cdmi = CDMI_API(config["cdmi_url"])
    cdmi_entity = CDMI_EntityAPI(config["cdmi_url"])
    
    # Break the complete list into smaller sub-lists to avoid timeouts
    start = 0
    increment = 1000
    end = start + increment
    counter = len(fidlist)
    fidsToRoles = {}
    rolesToFids = {}
    while counter > 0:
        try:
            roledict = cdmi_entity.get_relationship_HasFunctional(fidlist[start:end], [], [], ["id"])
        except HTTPError as e:
            if increment > 1:
                increment = increment / 2
                end = start + increment
            sys.stderr.write("caught '%s' error, increment is now %d\n" %(e.reason, increment))
            continue
        flist = getFieldFromRelationship(roledict, "from_link", "rel")
        rolelist = getFieldFromRelationship(roledict, "id", "to")
        for ii in range(len(flist)):
            # We have to use sets here because a bug(?) in get_relationship_HasFunctional allows multiple identical
            # links between fids and roles.
            # See for example what happens when you call it on g.9647.peg.2332
            if flist[ii] in fidsToRoles:
                fidsToRoles[flist[ii]].add(rolelist[ii])
            else:
                fidsToRoles[flist[ii]] = set([rolelist[ii]])
            if rolelist[ii] in rolesToFids:
                rolesToFids[rolelist[ii]].add(flist[ii])
            else:
                rolesToFids[rolelist[ii]] = set([flist[ii]])
                
        # Move to next sub-list
        start += increment
        end += increment
        if end >= len(fidlist):
            end = len(fidlist)
        counter -= increment
        
    # Convert back to lists to not break other functions.
    for f in fidsToRoles:
        fidsToRoles[f] = list(fidsToRoles[f])
    for r in rolesToFids:
        rolesToFids[r] = list(rolesToFids[r])
    return fidsToRoles, rolesToFids