iplMatrix = None
    global verbose, globalList, globalPathway
    for o, a in opts:
        if o == "-q":
            verbose = False
        elif o == "-i":
            iplMatrix = a

    ## build sourceList
    typeFile = "/".join(re.split("/", sifFile)[:-2] + ["TYPE.NA"])
    sourceList = re.split("/", sifFile)[-2] + ".list_t"

    h = mData.rList(featureFile)
    (n, i) = mPathway.rSIF(sifFile, typef=typeFile)
    (gn, gi) = mPathway.rPathway(globalPathway)
    p = mPathway.Pathway(n, i)
    s = mPathway.sortConnected(p)
    f = open(sourceList, "w")
    c = 1
    for i in s:
        u = list(set(i) & set(h))
        if len(u) >= 5:
            f.write("component_%s\t%s\n" % (c, "\t".join(u)))
            c += 1
            break
    f.write("component_all\t%s\n" % ("\t".join(list(set(n.keys()) & set(h)))))
    f.write("all\t%s\n" % ("\t".join(list(set(gn.keys()) & set(h)))))
    f.close()

    ## build overlapList
Exemple #2
0
    try:
        opts, args = getopt.getopt(args, "o:q")
    except getopt.GetoptError, err:
        print str(err)
        usage(2)

    if len(args) != 1:
        log("ERROR: incorrect number of arguments", die=True)

    inf = args[0]

    outf = None
    global verbose
    for o, a in opts:
        if o == "-o":
            outf = a
        elif o == "-q":
            verbose = False

    ## execute
    (n, i) = mPathway.rPathway(inf)
    p = mPathway.Pathway(n, i)
    p.selfTest()

    if outf != None:
        mPathway.wPathway(outf, p.nodes, p.interactions)


if __name__ == "__main__":
    main(sys.argv[1:])
Exemple #3
0
    parser.add_option(
        "--flattened",
        type="string",
        dest="flattened",
        action="store",
        help=
        "Join genes with all pathway links for complexes and families they belong to. Print that network with just proteins"
    )
    (options, args) = parser.parse_args()

    # nodes:
    #	name -> type
    # interactions:
    #	name -> interacting nodes
    nodes, Interactions, Proteins = mPathway.rPathway(options.pathway_file,
                                                      reverse=False,
                                                      retProteins=True)

    rev_nodes, revInteractions = mPathway.rPathway(options.pathway_file,
                                                   reverse=True,
                                                   retProteins=False)
    # maps complex strings to the components in each
    componentMap = mPathway.getComponentMap(rev_nodes, revInteractions)

    complexRE = re.compile(".*\((complex|family)\).*")
    abstractRE = re.compile(".*\(abstract\).*")

    # print out a 2-column interactions file of simple PPIs
    # protein -> protein
    ppi_edges = {}
    tf_edges = {}
    ## parse arguments
    try:
        opts, args = getopt.getopt(args, "o:q")
    except getopt.GetoptError, err:
        print str(err)
        usage(2)
    
    if len(args) != 1:
        log("ERROR: incorrect number of arguments", die = True)
    
    inf = args[0]
    
    outf = None
    global verbose
    for o, a in opts:
        if o == "-o":
            outf = a
        elif o == "-q":
            verbose = False
    
    ## execute
    (n, i) = mPathway.rPathway(inf)
    p = mPathway.Pathway(n, i)
    p.selfTest()
    
    if outf != None:
        mPathway.wPathway(outf, p.nodes, p.interactions)

if __name__ == "__main__":
    main(sys.argv[1:])
Exemple #5
0
# Date: 1-29-12
# takes a sets file from expand.pl and follows interactions on non-leaf nodes
# to get an expanded set. Must specify a regex key for non-leaf nodes on input 
# (default is "(abstract)")
#
# The current use for this is to find the transitive neighbors of abstract 
# concepts in the superpathway

import re, sys, mPathway
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-r","--non_leaf",type="string",dest="non_leaf_node", action="store", help="Non leaf regex string key", default=None)
parser.add_option("-p","--pathway_file",type="string",dest="pathway_file", action="store", help="superpathway file")
(options, args) = parser.parse_args()

rev_nodes, revInteractions = mPathway.rPathway(options.pathway_file, reverse = True, retProteins = False)
# maps complex strings to the components in each
componentMap = mPathway.getComponentMap(rev_nodes, revInteractions)

space2under = re.compile(' ')
under2space = re.compile('_')

abstractRE = re.compile(".*\(abstract\).*")
complexRE = re.compile(".*\((complex|family)\).*")

if options.non_leaf_node is None:
	nonLeafRE = abstractRE
else:
	nonLeafRE = re.compile(options.non_leaf_node)

# get the constituents of a complex
    prefix = args[0]
    inputArguments = args[1:]
    
    global verbose
    for o, a in opts:
        if o == "-q":
            verbose = False
    
    ## execute
    inputPathways = []
    for element in inputArguments:
        if os.path.isdir(element):
            for file in os.listdir(element):
                if file.endswith("pathway.tab"):
                    inputPathways.append(file)
        elif element.endswith("pathway.tab"):
            inputPathways.append(element)
    
    ## append pathways
    outPathway = mPathway.Pathway({}, {})
    for file in inputPathways:
        (nodes, interactions) = mPathway.rPathway(file)
        appendPathway = mPathway.Pathway(nodes, interactions)
        outPathway = mPathway.combinePathways(outPathway, appendPathway)
        
    ## write pathways
    mPathway.wPathway(prefix, outPathway.nodes, outPathway.interactions)

if __name__ == "__main__":
    main(sys.argv[1:])
def filterNet(files, phenotypes = [], statLine = None, outDir = None):
    global filterBounds
    filterString = "%s_%s" % (filterBounds[0], filterBounds[1])
    
    ## read global pathway
    (gNodes, gInteractions) = mPathway.rPathway(globalPathway)
    
    ## read drugs
    #drugData = mData.rSet(drugBank)
    
    ## write LABEL.NA, TYPE.NA
    if outputAttributes:
        typef = open("TYPE.NA", "w")
        labelf = open("LABEL.NA", "w")
        typef.write("TYPE (class=java.lang.String)\n")
        labelf.write("LABEL (class=java.lang.String)\n")
        for i in gNodes.keys():
            typef.write("%s = %s\n" % (i, gNodes[i]))
            if gNodes[i] == "protein":
                labelf.write("%s = %s\n" % (i, i))
            else:
                labelf.write("%s = %s\n" % (i, ""))
        #drugs here
        typef.close()
        labelf.close()
    
    ## read scores
    uData = dict()
    sData = dict()
    for i in range(len(files)):
        uData[i] = mData.rCRSData(files[i])
        sData[i] = dict()
        for j in uData[i].keys():
            sData[i][j] = dict()
            for k in uData[i][j].keys():
                try:
                    sData[i][j][k] = abs(float(uData[i][j][k]))
                except ValueError:
                    sData[i][j][k] = "NA"
    
    ## iterate phenotypes
    for p in sData[0].keys():
        if len(phenotypes) > 0:
            if p not in phenotypes:
                continue
        pNodes = dict()
        pInteractions = dict()
        
        ## write SCORE.NA
        if outputAttributes:
            scoref = open(p+"_SCORE.NA", "w")
            scoref.write("SCORE (class=java.lang.Float)\n")
            for i in gNodes.keys():
                if i in uData[0][p]:
                    if uData[0][p][i] == "NA":
                        scoref.write("%s = %s\n" % (i, "0"))
                    else:
                        scoref.write("%s = %s\n" % (i, uData[0][p][i]))
                else:
                    scoref.write("%s = %s\n" % (i, "0"))
            scoref.close()
        
        ## compute thresholds
        pStats = []
        if statLine == None:
            for i in range(len(sData.keys())):
                pStats.append(mCalculate.mean_std(sData[i][p].values()))
        else:
            for i in re.split(",",statLine):
                (v1, v2) = re.split(";",i)
                pStats.append((float(v1), float(v2)))
        log("%s\t%s;%s" % (p, pStats[0][0], pStats[0][1]))
        for i in range(1, len(pStats)):
            log(",%s;%s" % (pStats[i][0], pStats[i][1]))
        log("\n")
        
        ## iterate links
        for a in gInteractions.keys():
            if a not in sData[0][p]:
                continue
            elif sData[0][p][a] == "NA":
                continue
            for b in gInteractions[a].keys():
                if b not in sData[0][p]:
                    continue
                elif sData[0][p][b] == "NA":
                    continue
                ## score nodes by threshold
                aScore = []
                bScore = []
                linkScore = []
                for i in range(len(sData.keys())):
                    linkScore.append([sData[i][p][a], sData[i][p][b]])
                for i in range(len(sData.keys())):
                    if linkScore[i][0] > pStats[i][0]+filterBounds[1]*pStats[i][1]:
                        aScore.append(2)
                    elif linkScore[i][0] > pStats[i][0]+filterBounds[0]*pStats[i][1]:
                        aScore.append(1)
                    else:
                        aScore.append(0)
                    if linkScore[i][1] > pStats[i][0]+filterBounds[1]*pStats[i][1]:
                        bScore.append(2)
                    elif linkScore[i][1] > pStats[i][0]+filterBounds[0]*pStats[i][1]:
                        bScore.append(1)
                    else:
                        bScore.append(0)
                
                ## selection rule
                if includeType == "OR":
                    if max(aScore)+max(bScore) >= 3:
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
                elif includeType == "AND":
                    votes = 0
                    for i in range(len(sData.keys())):
                        if aScore[i]+bScore[i] >= 3:
                            votes += 0
                    if votes == len(sData.keys()):
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
                elif includeType == "MAIN":
                    if aScore[0]+bScore[0] >= 3:
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
        
        ## connect top scoring disconnected nodes
        sortedTop = []
        for i in sData[0][p].keys():
            if i not in gNodes:
                continue
            if gNodes[i] in ["protein"]:
                sortedTop.append(i)
        sortedTop.sort(lambda x, y: cmp(sData[0][p][y],sData[0][p][x]))
        while (sData[0][p][sortedTop[0]] == "NA"):
            sortedTop.pop(0)
            if len(sortedTop) == 0:
                break
        for i in range(topDisconnected):
            if i > len(sortedTop)-1:
                break
            if sData[0][p][sortedTop[i]] < pStats[0][0]+filterBounds[0]*pStats[0][1]:
                break
            if sortedTop[i] not in gNodes:
                continue
            if sortedTop[i] not in pNodes:
                pNodes[sortedTop[i]] = gNodes[sortedTop[i]]
                pInteractions[sortedTop[i]] = dict()
                pInteractions[sortedTop[i]]["__DISCONNECTED__"] = "-disconnected-"
        
        ## output
        if outDir == None:
            wrtDir = p
        else:
            wrtDir = outDir
        if not os.path.exists(wrtDir):
            os.system("mkdir %s" % (wrtDir))

        ## output for pathway-predictor
        if outputPARADIGM:
            protSet = set()
            for i in gNodes:
                if gNodes[i] == "protein":
                    protSet.update([i])
            netNodes = mPathway.sortConnected(pNodes, pInteractions, mPathway.revInteractions(pInteractions))
            trainNodes = []
            for i in netNodes:
                if len((protSet) & set(i)) > featureReq:
                    trainNodes += i
            if len(trainNodes) == 0:
                log("ERROR: no nets contained enough data\n...trying again\n")
                if filterBounds[0]+0.1 <= filterBounds[1]:
                    filterBounds[1] -= 0.1
                else:
                    filterBounds[0] -= 0.1
                    filterBounds[1] -= 0.1
                filterNet(files, phenotypes = phenotypes, statLine = statLine, outDir = outDir)
                sys.exit(0)
            (lNodes, lInteractions) = mPathway.constructInteractions(trainNodes, pNodes, pInteractions)
            if outputAttributes:
                mPathway.wSIF("%s/%s_%s_pp.sif" % (wrtDir, p, filterString), lInteractions)
            ## connect class node
            classNode = "class"
            lInteractions[classNode] = dict()
            for i in lNodes.keys():
                if i not in protSet:
                    continue
                lInteractions[classNode][i] = "-cl>"
            lNodes[classNode] = "active"
            mPathway.wPathway("%s/%s_%s_pp.tab" % (wrtDir, p, filterString), lNodes, lInteractions)        
        ## output nodrug pathway
        else:
            mPathway.wSIF("%s/%s_%s_nodrug.sif" % (wrtDir, p, filterString), pInteractions)
            (cpNodes, cpInteractions) = mPathway.filterComplexesByGeneSupport(pNodes, pInteractions, 
                                        mPathway.revInteractions(pInteractions), gNodes,
                                        mPathway.getComponentMap(gNodes, mPathway.revInteractions(gInteractions)))
            mPathway.wSIF("%s/%s_%s_nodrug_cleaned.sif" % (wrtDir, p, filterString), cpInteractions)
Exemple #8
0
    iplMatrix = None
    global verbose, globalList, globalPathway
    for o, a in opts:
        if o == "-q":
            verbose = False
        elif o == "-i":
            iplMatrix = a

    ## build sourceList
    typeFile = "/".join(re.split("/", sifFile)[:-2] + ["TYPE.NA"])
    sourceList = re.split("/", sifFile)[-2] + ".list_t"

    h = mData.rList(featureFile)
    (n, i) = mPathway.rSIF(sifFile, typef=typeFile)
    (gn, gi) = mPathway.rPathway(globalPathway)
    p = mPathway.Pathway(n, i)
    s = mPathway.sortConnected(p)
    f = open(sourceList, "w")
    c = 1
    for i in s:
        u = list(set(i) & set(h))
        if len(u) >= 5:
            f.write("component_%s\t%s\n" % (c, "\t".join(u)))
            c += 1
            break
    f.write("component_all\t%s\n" % ("\t".join(list(set(n.keys()) & set(h)))))
    f.write("all\t%s\n" % ("\t".join(list(set(gn.keys()) & set(h)))))
    f.close()

    ## build overlapList
    inputArguments = args[1:]

    global verbose
    for o, a in opts:
        if o == "-q":
            verbose = False

    ## execute
    inputPathways = []
    for element in inputArguments:
        if os.path.isdir(element):
            for file in os.listdir(element):
                if file.endswith("pathway.tab"):
                    inputPathways.append(file)
        elif element.endswith("pathway.tab"):
            inputPathways.append(element)

    ## append pathways
    outPathway = mPathway.Pathway({}, {})
    for file in inputPathways:
        (nodes, interactions) = mPathway.rPathway(file)
        appendPathway = mPathway.Pathway(nodes, interactions)
        outPathway = mPathway.combinePathways(outPathway, appendPathway)

    ## write pathways
    mPathway.wPathway(prefix, outPathway.nodes, outPathway.interactions)


if __name__ == "__main__":
    main(sys.argv[1:])
Exemple #10
0
 
 if len(args) != 3:
     log("ERROR: incorrect number of arguments", die = True)
 
 featureFile = args[0]
 pathwayFile = args[1]
 scoreFile = args[2]
 
 global verbose
 for o, a in opts:
     if o == "-q":
         verbose = False
 
 ## execute
 featureList = mData.rList(featureFile)
 (gNodes, gInteractions) = mPathway.rPathway(pathwayFile)
 scoreMap = {}
 scoreMap[sessionName] = mData.r2Col(scoreFile)
 
 ## find connected
 connectList = set()
 for source in featureList:
     if source not in gNodes:
         continue
     for target in featureList:
         if target not in gNodes:
             continue
         if source == target:
             continue
         paths = mPathway.shortestPath(source, target, gInteractions, maxDistance = maxDistance)
         if len(paths) == 0: