def correlationPairwise(outf, inf, group1, group2, method=pcorrelation): """Computes all pairwise correlations between group1 and group2""" inData = mData.rCRSData(inf) inRows = inData[inData.keys()[0]].keys() f = open(outf, "w") for i in group1: if i not in inData: continue list1 = [] for k in inRows: list1.append(inData[i][k]) for j in group2: if j not in inData: continue list2 = [] for k in inRows: list2.append(inData[j][k]) value = method(list1, list2) f.write("%s\t%s\t%s\n" % (i, j, value)) f.close()
def correlationPairwise(outf, inf, group1, group2, method = pcorrelation): """Computes all pairwise correlations between group1 and group2""" inData = mData.rCRSData(inf) inRows = inData[inData.keys()[0]].keys() f = open(outf, "w") for i in group1: if i not in inData: continue list1 = [] for k in inRows: list1.append(inData[i][k]) for j in group2: if j not in inData: continue list2 = [] for k in inRows: list2.append(inData[j][k]) value = method(list1, list2) f.write("%s\t%s\t%s\n" % (i, j, value)) f.close()
def correlationMatrix(outf, inf, method = pcorrelation): """Takes a tab file and cross-correlates the columns""" outData = dict() inData = mData.rCRSData(inf) inCols = inData.keys() inRows = inData[inCols[0]].keys() for i in inCols: outData[i] = dict() outData[i][i] = 1.0 for i in range(len(inCols)-1): list1 = [] for k in inRows: list1.append(inData[inCols[i]][k]) for j in range(i+1, len(inCols)): list2 = [] for k in inRows: list2.append(inData[inCols[j]][k]) value = method(list1, list2) outData[inCols[i]][inCols[j]] = value outData[inCols[j]][inCols[i]] = value mData.wCRSData(outf, outData)
def correlationMatrix(outf, inf, method=pcorrelation): """Takes a tab file and cross-correlates the columns""" outData = dict() inData = mData.rCRSData(inf) inCols = inData.keys() inRows = inData[inCols[0]].keys() for i in inCols: outData[i] = dict() outData[i][i] = 1.0 for i in range(len(inCols) - 1): list1 = [] for k in inRows: list1.append(inData[inCols[i]][k]) for j in range(i + 1, len(inCols)): list2 = [] for k in inRows: list2.append(inData[inCols[j]][k]) value = method(list1, list2) outData[inCols[i]][inCols[j]] = value outData[inCols[j]][inCols[i]] = value mData.wCRSData(outf, outData)
"%s\t%s\t%s\t%s\t%s\n" % ( feature, statsMap[feature][1], ",".join(membersMap[feature]), proportionMap[feature], statsMap[feature][0], ) ) f.close() ## summarize pathways if iplMatrix is not None: pathwayVals = {} pathwayScores = {} (iplData, iplSamples, iplFeatures) = mData.rCRSData(iplMatrix, retFeatures=True) f = open(re.split("/", sifFile)[-2] + ".members.output", "r") for line in f: line = line.rstrip("\n\r") pline = re.split("\t", line) if pline[1] in statsMap: if pline[1] not in pathwayVals: pathwayVals[pline[1]] = {} for sample in iplSamples: pathwayVals[pline[1]][sample] = [] if pline[0] in iplFeatures: for sample in iplSamples: pathwayVals[pline[1]][sample].append(abs(float(iplData[sample][pline[0]]))) f.close() for pathwayName in pathwayVals.keys(): pathwayScores[pathwayName] = {}
elif o == "-q": verbose = False ## execute samples = [] features = [] if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) ## read circleFiles circleData = [] circleColors = [] for i in range(len(circleFiles)): (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures = True) circleData.append(data) minCol = rgb(0, 0, 255) zerCol = rgb(255, 255, 255) maxCol = rgb(255, 0, 0) if circleFiles[i].endswith("meth"): maxCol = rgb(0, 0, 255) minCol = rgb(255, 0, 0) log("Color: meth\n") elif circleFiles[i].startswith("mut."): maxCol = rgb(0, 0, 0) minCol = rgb(255, 255, 255) log("Color: mut\n") circleColors.append( (minCol, zerCol, maxCol) ) if sampleFile == None: samples = list(set(cols) | set(samples))
def cli_routine(outputDir, circleFiles, orderFiles, sampleFile, featureFile, orderFeature, centerFile, colorscaleFile, printLabel, verbose, cohortMinMax=False, purpleHack = True): """Routine for program execution via command-line.""" # I've tried not to touch this method as much as possible. # I don't want to break the way it was working for Sam Ng. # chrisw ## execute samples = [] features = [] if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) # end section for getting lists of samples and features ## read circleFiles # circleData is a list of dict[col][row]=score from each circleFile circleData = [] # circleColorsPalette is a list of (minColor),(zeroColor),(maxColor) circleColorsPalette = [] ## read colorscaleFile # the format is as follows - header compulsory: # min/max color coding color1 color2 color 3 # -2,2 rgb 155,155,155 255,255,255 0,0,0, # - rgb 155,0,155 255,0,255 0,0,0, # the "color format" is intended to support more color format, as I have # seen the html-colors in the code. # Michael ([email protected]) colorscaleData = None if colorscaleFile != None: if cohortMinMax: log("WARNING: The -k option overrides -m") colorscaleData = mData.retRows(colorscaleFile,aslist=True) line=1 for cs in colorscaleData: line = line + 1 if len(cs) != 5: log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n", die = True) try: cs[0] = [float(x) for x in cs[0].split(",")] except ValueError: pass if len(cs[0]) != 2 and cs[0] != "-": print cs[0] log("ERROR: Two data points or dash needed for color scale\n", die = True) if cs[1].lower() == "rgb": try: cs[2] = rgb(*[float(x) for x in cs[2].split(",")]) cs[3] = rgb(*[float(x) for x in cs[3].split(",")]) cs[4] = rgb(*[float(x) for x in cs[4].split(",")]) except TypeError: log("ERROR: RGB needs three values on line " + str(line) + "\n", die = True) except ValueError: log("ERROR: RGB color not correctly defined on line " + str(line) + "\n", die=True) else: log("ERROR: Unknown color coding on line " + str(line) + ": " + str(cs[1]) + "\n", die=True) for i in xrange(len(circleFiles)): # get data, samples, and features from each circleFile # data is a dict[col][row]=score # cols is a list of sample names # features is a list of feature names (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True) circleData.append(data) minCol = lightBlueRGB zerCol = whiteRGB maxCol = redRGB if colorscaleFile != None and i<len(colorscaleData): #get colors from specified colorscaleFile minCol = colorscaleData[i][2] zerCol = colorscaleData[i][3] maxCol = colorscaleData[i][4] # special cases for -meth and -mut # if circleFiles[i].endswith("meth"): # maxCol = blueRGB # minCol = redRGB # log("Color: meth\n") # elif circleFiles[i].endswith("mut"): # maxCol = blackRGB # minCol = whiteRGB # log("Color: mut\n") circleColorsPalette.append((minCol, zerCol, maxCol)) # if no sampleFile/featureFile, default to using samples/features from circleFiles if sampleFile == None: samples = list(set(cols) | set(samples)) if featureFile == None: features = list(set(rows) | set(features)) # end section for reading circleFiles ## read centerFile centerData = None if centerFile != None: centerData = mData.r2Col(centerFile, header=True) ## sort if orderFeature != None: if len(orderFiles) > 0: orderData = [] orderColors = [] for i in xrange(len(orderFiles)): orderData.append(mData.rCRSData(orderFiles[i])) minCol = whiteRGB zerCol = whiteRGB maxCol = blackRGB orderColors.append((minCol, zerCol, maxCol)) else: orderData = circleData # sort samples based on sample score in orderData # priority of sorting determined by orderFiles parameter samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData)) ## cohort png # cgi will probably not use orderFiles if len(orderFiles) > 0: imgFile = "%s/Cohort.png" % (outputDir) label = "Cohort" centerCol = whiteRGB.tohex() cohortCircleCols = [] for i in xrange(len(orderData)): ringCols = [] ringVals = [] for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringVals.append(orderData[i][sample][orderFeature]) elif "*" in orderData[i][sample]: ringVals.append(orderData[i][sample]["*"]) minVal = min([-0.01] + mData.floatList(ringVals)) maxVal = max([0.01] + mData.floatList(ringVals)) for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) elif "*" in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample]["*"], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) else: ringCols.append(greyRGB.tohex()) else: ringCols.append(greyRGB.tohex()) cohortCircleCols.append(ringCols) plotCircle(imgFile, label=label, centerCol=centerCol, circleCols=cohortCircleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=5) # end section for sample ordering ## plot images if centerData != None: centerDataFloatList = mData.floatList(centerData.values()) centerDataMinVal = min([-0.01] + centerDataFloatList) centerDataMaxVal = max([0.01] + centerDataFloatList) # get min/max values for datasets if cohortMinMax: (minValList, maxValList) = getCohortMinMaxValues(features, samples, circleData) else: (minValList, maxValList) = (None, None) if colorscaleData != None: (minValList, maxValList) = getColorScaleMinMaxValues(minValList, maxValList, len(circleData), colorscaleData) for feature in features: log("Drawing %s\n" % (feature)) centerColHex = None if centerData != None: if feature in centerData: centerColHex = getColor(centerData[feature], centerDataMinVal, centerDataMaxVal, minColor=lightBlueRGB, zeroColor=whiteRGB, purple0Hack=purpleHack) imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature)) label = "" if printLabel: label = feature image_width = 5.0 drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColorsPalette, width=image_width, centerColHex=centerColHex, minValList=minValList, maxValList=maxValList, purple0Hack=purpleHack) for sample in samples: log("ordered samples: %s\n" % (sample))
if i.endswith(".html"): continue elif i.endswith(".py"): continue elif i.endswith("js"): continue elif i.endswith("swf"): continue else: log("Working %s ...\n" % (i)) d.write(htmlIndexItem % ("%s.html" % (i), i)) f = open("%s.html" % (i), "w") f.write(htmlHead) f.write(htmlCategory % i) if os.path.exists("%s/stats.tab" % (i)): (samData, samCols, samItems) = mData.rCRSData("%s/stats.tab" % (i), retFeatures = True) samCols = ["Link"] + samCols else: samData = {} samCols = ["Link", "Note"] for j in samCols: samData[j] = {} samItems = [] for j in os.listdir(i): if j.startswith("img"): continue samItems.append(re.sub(".html", "", j)) for j in samItems: samData["Link"][j] = "%s.html" % (j) samData["Note"][j] = "" f.write(htmlTableHead % ("</th>\n <th>".join(samCols)))
def filterNet(files, phenotypes = [], statLine = None, outDir = None): global filterBounds filterString = "%s_%s" % (filterBounds[0], filterBounds[1]) ## read global pathway (gNodes, gInteractions) = mPathway.rPathway(globalPathway) ## read drugs #drugData = mData.rSet(drugBank) ## write LABEL.NA, TYPE.NA if outputAttributes: typef = open("TYPE.NA", "w") labelf = open("LABEL.NA", "w") typef.write("TYPE (class=java.lang.String)\n") labelf.write("LABEL (class=java.lang.String)\n") for i in gNodes.keys(): typef.write("%s = %s\n" % (i, gNodes[i])) if gNodes[i] == "protein": labelf.write("%s = %s\n" % (i, i)) else: labelf.write("%s = %s\n" % (i, "")) #drugs here typef.close() labelf.close() ## read scores uData = dict() sData = dict() for i in range(len(files)): uData[i] = mData.rCRSData(files[i]) sData[i] = dict() for j in uData[i].keys(): sData[i][j] = dict() for k in uData[i][j].keys(): try: sData[i][j][k] = abs(float(uData[i][j][k])) except ValueError: sData[i][j][k] = "NA" ## iterate phenotypes for p in sData[0].keys(): if len(phenotypes) > 0: if p not in phenotypes: continue pNodes = dict() pInteractions = dict() ## write SCORE.NA if outputAttributes: scoref = open(p+"_SCORE.NA", "w") scoref.write("SCORE (class=java.lang.Float)\n") for i in gNodes.keys(): if i in uData[0][p]: if uData[0][p][i] == "NA": scoref.write("%s = %s\n" % (i, "0")) else: scoref.write("%s = %s\n" % (i, uData[0][p][i])) else: scoref.write("%s = %s\n" % (i, "0")) scoref.close() ## compute thresholds pStats = [] if statLine == None: for i in range(len(sData.keys())): pStats.append(mCalculate.mean_std(sData[i][p].values())) else: for i in re.split(",",statLine): (v1, v2) = re.split(";",i) pStats.append((float(v1), float(v2))) log("%s\t%s;%s" % (p, pStats[0][0], pStats[0][1])) for i in range(1, len(pStats)): log(",%s;%s" % (pStats[i][0], pStats[i][1])) log("\n") ## iterate links for a in gInteractions.keys(): if a not in sData[0][p]: continue elif sData[0][p][a] == "NA": continue for b in gInteractions[a].keys(): if b not in sData[0][p]: continue elif sData[0][p][b] == "NA": continue ## score nodes by threshold aScore = [] bScore = [] linkScore = [] for i in range(len(sData.keys())): linkScore.append([sData[i][p][a], sData[i][p][b]]) for i in range(len(sData.keys())): if linkScore[i][0] > pStats[i][0]+filterBounds[1]*pStats[i][1]: aScore.append(2) elif linkScore[i][0] > pStats[i][0]+filterBounds[0]*pStats[i][1]: aScore.append(1) else: aScore.append(0) if linkScore[i][1] > pStats[i][0]+filterBounds[1]*pStats[i][1]: bScore.append(2) elif linkScore[i][1] > pStats[i][0]+filterBounds[0]*pStats[i][1]: bScore.append(1) else: bScore.append(0) ## selection rule if includeType == "OR": if max(aScore)+max(bScore) >= 3: (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) elif includeType == "AND": votes = 0 for i in range(len(sData.keys())): if aScore[i]+bScore[i] >= 3: votes += 0 if votes == len(sData.keys()): (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) elif includeType == "MAIN": if aScore[0]+bScore[0] >= 3: (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) ## connect top scoring disconnected nodes sortedTop = [] for i in sData[0][p].keys(): if i not in gNodes: continue if gNodes[i] in ["protein"]: sortedTop.append(i) sortedTop.sort(lambda x, y: cmp(sData[0][p][y],sData[0][p][x])) while (sData[0][p][sortedTop[0]] == "NA"): sortedTop.pop(0) if len(sortedTop) == 0: break for i in range(topDisconnected): if i > len(sortedTop)-1: break if sData[0][p][sortedTop[i]] < pStats[0][0]+filterBounds[0]*pStats[0][1]: break if sortedTop[i] not in gNodes: continue if sortedTop[i] not in pNodes: pNodes[sortedTop[i]] = gNodes[sortedTop[i]] pInteractions[sortedTop[i]] = dict() pInteractions[sortedTop[i]]["__DISCONNECTED__"] = "-disconnected-" ## output if outDir == None: wrtDir = p else: wrtDir = outDir if not os.path.exists(wrtDir): os.system("mkdir %s" % (wrtDir)) ## output for pathway-predictor if outputPARADIGM: protSet = set() for i in gNodes: if gNodes[i] == "protein": protSet.update([i]) netNodes = mPathway.sortConnected(pNodes, pInteractions, mPathway.revInteractions(pInteractions)) trainNodes = [] for i in netNodes: if len((protSet) & set(i)) > featureReq: trainNodes += i if len(trainNodes) == 0: log("ERROR: no nets contained enough data\n...trying again\n") if filterBounds[0]+0.1 <= filterBounds[1]: filterBounds[1] -= 0.1 else: filterBounds[0] -= 0.1 filterBounds[1] -= 0.1 filterNet(files, phenotypes = phenotypes, statLine = statLine, outDir = outDir) sys.exit(0) (lNodes, lInteractions) = mPathway.constructInteractions(trainNodes, pNodes, pInteractions) if outputAttributes: mPathway.wSIF("%s/%s_%s_pp.sif" % (wrtDir, p, filterString), lInteractions) ## connect class node classNode = "class" lInteractions[classNode] = dict() for i in lNodes.keys(): if i not in protSet: continue lInteractions[classNode][i] = "-cl>" lNodes[classNode] = "active" mPathway.wPathway("%s/%s_%s_pp.tab" % (wrtDir, p, filterString), lNodes, lInteractions) ## output nodrug pathway else: mPathway.wSIF("%s/%s_%s_nodrug.sif" % (wrtDir, p, filterString), pInteractions) (cpNodes, cpInteractions) = mPathway.filterComplexesByGeneSupport(pNodes, pInteractions, mPathway.revInteractions(pInteractions), gNodes, mPathway.getComponentMap(gNodes, mPathway.revInteractions(gInteractions))) mPathway.wSIF("%s/%s_%s_nodrug_cleaned.sif" % (wrtDir, p, filterString), cpInteractions)
## output summary f = open(re.split("/", sifFile)[-2] + ".summary.tab", "w") features = statsMap.keys() features.sort(lambda x, y: cmp(statsMap[x][1], statsMap[y][1])) for feature in features: f.write("%s\t%s\t%s\t%s\t%s\n" % (feature, statsMap[feature][1], ",".join(membersMap[feature]), proportionMap[feature], statsMap[feature][0])) f.close() ## summarize pathways if iplMatrix is not None: pathwayVals = {} pathwayScores = {} (iplData, iplSamples, iplFeatures) = mData.rCRSData(iplMatrix, retFeatures=True) f = open(re.split("/", sifFile)[-2] + ".members.output", "r") for line in f: line = line.rstrip("\n\r") pline = re.split("\t", line) if pline[1] in statsMap: if pline[1] not in pathwayVals: pathwayVals[pline[1]] = {} for sample in iplSamples: pathwayVals[pline[1]][sample] = [] if pline[0] in iplFeatures: for sample in iplSamples: pathwayVals[pline[1]][sample].append( abs(float(iplData[sample][pline[0]]))) f.close() for pathwayName in pathwayVals.keys():
continue elif i.endswith(".py"): continue elif i.endswith("js"): continue elif i.endswith("swf"): continue else: log("Working %s ...\n" % (i)) d.write(htmlIndexItem % ("%s.html" % (i), i)) f = open("%s.html" % (i), "w") f.write(htmlHead) f.write(htmlCategory % i) if os.path.exists("%s/stats.tab" % (i)): (samData, samCols, samItems) = mData.rCRSData("%s/stats.tab" % (i), retFeatures=True) samCols = ["Link"] + samCols else: samData = {} samCols = ["Link", "Note"] for j in samCols: samData[j] = {} samItems = [] for j in os.listdir(i): if j.startswith("img"): continue samItems.append(re.sub(".html", "", j)) for j in samItems: samData["Link"][j] = "%s.html" % (j) samData["Note"][j] = "" f.write(htmlTableHead % ("</th>\n <th>".join(samCols)))
def cli_routine(outputDir, circleFiles, orderFiles, sampleFile, featureFile, orderFeature, centerFile, colorscaleFile, printLabel, verbose, cohortMinMax=False, purpleHack=True): """Routine for program execution via command-line.""" # I've tried not to touch this method as much as possible. # I don't want to break the way it was working for Sam Ng. # chrisw ## execute samples = [] features = [] if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) # end section for getting lists of samples and features ## read circleFiles # circleData is a list of dict[col][row]=score from each circleFile circleData = [] # circleColorsPalette is a list of (minColor),(zeroColor),(maxColor) circleColorsPalette = [] ## read colorscaleFile # the format is as follows - header compulsory: # min/max color coding color1 color2 color 3 # -2,2 rgb 155,155,155 255,255,255 0,0,0, # - rgb 155,0,155 255,0,255 0,0,0, # the "color format" is intended to support more color format, as I have # seen the html-colors in the code. # Michael ([email protected]) colorscaleData = None if colorscaleFile != None: if cohortMinMax: log("WARNING: The -k option overrides -m") colorscaleData = mData.retRows(colorscaleFile, aslist=True) line = 1 for cs in colorscaleData: line = line + 1 if len(cs) != 5: log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n", die=True) try: cs[0] = [float(x) for x in cs[0].split(",")] except ValueError: pass if len(cs[0]) != 2 and cs[0] != "-": print cs[0] log("ERROR: Two data points or dash needed for color scale\n", die=True) if cs[1].lower() == "rgb": try: cs[2] = rgb(*[float(x) for x in cs[2].split(",")]) cs[3] = rgb(*[float(x) for x in cs[3].split(",")]) cs[4] = rgb(*[float(x) for x in cs[4].split(",")]) except TypeError: log("ERROR: RGB needs three values on line " + str(line) + "\n", die=True) except ValueError: log("ERROR: RGB color not correctly defined on line " + str(line) + "\n", die=True) else: log("ERROR: Unknown color coding on line " + str(line) + ": " + str(cs[1]) + "\n", die=True) for i in xrange(len(circleFiles)): # get data, samples, and features from each circleFile # data is a dict[col][row]=score # cols is a list of sample names # features is a list of feature names (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True) circleData.append(data) minCol = lightBlueRGB zerCol = whiteRGB maxCol = redRGB if colorscaleFile != None and i < len(colorscaleData): #get colors from specified colorscaleFile minCol = colorscaleData[i][2] zerCol = colorscaleData[i][3] maxCol = colorscaleData[i][4] # special cases for -meth and -mut # if circleFiles[i].endswith("meth"): # maxCol = blueRGB # minCol = redRGB # log("Color: meth\n") # elif circleFiles[i].endswith("mut"): # maxCol = blackRGB # minCol = whiteRGB # log("Color: mut\n") circleColorsPalette.append((minCol, zerCol, maxCol)) # if no sampleFile/featureFile, default to using samples/features from circleFiles if sampleFile == None: samples = list(set(cols) | set(samples)) if featureFile == None: features = list(set(rows) | set(features)) # end section for reading circleFiles ## read centerFile centerData = None if centerFile != None: centerData = mData.r2Col(centerFile, header=True) ## sort if orderFeature != None: if len(orderFiles) > 0: orderData = [] orderColors = [] for i in xrange(len(orderFiles)): orderData.append(mData.rCRSData(orderFiles[i])) minCol = whiteRGB zerCol = whiteRGB maxCol = blackRGB orderColors.append((minCol, zerCol, maxCol)) else: orderData = circleData # sort samples based on sample score in orderData # priority of sorting determined by orderFiles parameter samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData)) ## cohort png # cgi will probably not use orderFiles if len(orderFiles) > 0: imgFile = "%s/Cohort.png" % (outputDir) label = "Cohort" centerCol = whiteRGB.tohex() cohortCircleCols = [] for i in xrange(len(orderData)): ringCols = [] ringVals = [] for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringVals.append(orderData[i][sample][orderFeature]) elif "*" in orderData[i][sample]: ringVals.append(orderData[i][sample]["*"]) minVal = min([-0.01] + mData.floatList(ringVals)) maxVal = max([0.01] + mData.floatList(ringVals)) for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringCols.append( getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) elif "*" in orderData[i][sample]: ringCols.append( getColor(orderData[i][sample]["*"], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) else: ringCols.append(greyRGB.tohex()) else: ringCols.append(greyRGB.tohex()) cohortCircleCols.append(ringCols) plotCircle(imgFile, label=label, centerCol=centerCol, circleCols=cohortCircleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=5) # end section for sample ordering ## plot images if centerData != None: centerDataFloatList = mData.floatList(centerData.values()) centerDataMinVal = min([-0.01] + centerDataFloatList) centerDataMaxVal = max([0.01] + centerDataFloatList) # get min/max values for datasets if cohortMinMax: (minValList, maxValList) = getCohortMinMaxValues(features, samples, circleData) else: (minValList, maxValList) = (None, None) if colorscaleData != None: (minValList, maxValList) = getColorScaleMinMaxValues(minValList, maxValList, len(circleData), colorscaleData) for feature in features: log("Drawing %s\n" % (feature)) centerColHex = None if centerData != None: if feature in centerData: centerColHex = getColor(centerData[feature], centerDataMinVal, centerDataMaxVal, minColor=lightBlueRGB, zeroColor=whiteRGB, purple0Hack=purpleHack) imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature)) label = "" if printLabel: label = feature image_width = 5.0 drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColorsPalette, width=image_width, centerColHex=centerColHex, minValList=minValList, maxValList=maxValList, purple0Hack=purpleHack) for sample in samples: log("ordered samples: %s\n" % (sample))
log("ERROR: incorrect number of arguments", die=True) phenotypeFile = args[0] dataFile = args[1] global verbose for o, a in opts: if o == "-q": verbose = False ## execute phenotypeName = re.split("/", phenotypeFile)[-1].rstrip(".tab") dataName = re.split("/", dataFile)[-1].rstrip(".tab") outputDir = "OCCAM__%s__%s" % (phenotypeName, dataName) syscmd("mkdir %s" % (outputDir)) (phenData, phenColumns, phenRows) = mData.rCRSData(phenotypeFile, retFeatures=True) (matData, matColumns, matRows) = mData.rCRSData(dataFile, retFeatures=True) ## samples posSamples = [] negSamples = [] for sample in phenRows: if sample not in matColumns: continue if phenData[phenColumns[phenIndex]][sample] == "+": posSamples.append(sample) elif phenData[phenColumns[phenIndex]][sample] == "-": negSamples.append(sample) ## output f = open("%s/results.tab" % (outputDir), "w")
log("ERROR: incorrect number of arguments", die = True) phenotypeFile = args[0] dataFile = args[1] global verbose for o, a in opts: if o == "-q": verbose = False ## execute phenotypeName = re.split("/", phenotypeFile)[-1].rstrip(".tab") dataName = re.split("/", dataFile)[-1].rstrip(".tab") outputDir = "OCCAM__%s__%s" % (phenotypeName, dataName) syscmd("mkdir %s" % (outputDir)) (phenData, phenColumns, phenRows) = mData.rCRSData(phenotypeFile, retFeatures = True) (matData, matColumns, matRows) = mData.rCRSData(dataFile, retFeatures = True) ## samples posSamples = [] negSamples = [] for sample in phenRows: if sample not in matColumns: continue if phenData[phenColumns[phenIndex]][sample] == "+": posSamples.append(sample) elif phenData[phenColumns[phenIndex]][sample] == "-": negSamples.append(sample) ## output f = open("%s/results.tab" % (outputDir), "w")
usage(1) if (len(args) == 1): tabFile = sys.stdin htmlFile = args[0] else: tabFile = args[0] htmlFile = args[1] global verbose for o, a in opts: if o == "-q": verbose = False ## read tabFile (tabData, tabCols, tabRows) = mData.rCRSData(tabFile, retFeatures = True) ## write htmlFile f = open("%s" % (htmlFile), "w") f.write(htmlHead) tabCols = ["id"] + tabCols f.write(htmlTableHead % ("</th>\n <th>".join(tabCols))) for i in tabRows: tabList = [] for j in tabCols[1:]: tabList.append(str(tabData[j][i])) f.write(htmlTableItem % (i, "</td>\n <td>".join(tabList))) f.write(htmlTableTail) f.write(htmlTail) f.close()
if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) ## read circleFiles circleData = [] circleColors = [] ## ## record file types for each, effects the color scheme ## use the input index for each color_scheme_map = {} for i in range(len(circleFiles)): circleFile, colorScheme = circleFiles[i].split(':') color_scheme_map[i] = parseColorScheme(colorScheme) (data, cols, rows) = mData.rCRSData(circleFile, retFeatures = True) circleData.append(data) #circleColors.append( (minCol, zerCol, maxCol) ) #if sampleFile == None: # samples = list(set(cols) | set(samples)) #if featureFile == None: # features = list(set(rows) | set(features)) mutationData = None if mutationsFile != None: mutationData, cols, rows = mData.rCRSData(mutationsFile, retFeatures = True) ## read centerFile centerData = None if centerFile != None: centerData = mData.r2Col(centerFile, header = True)