Python rCRSData Examples, mData.rCRSData Python Examples

Example #1

0

Show file

File: mCalculate.py Project: smsinks/stuartlab-circleplotter-py

def correlationPairwise(outf, inf, group1, group2, method=pcorrelation):
    """Computes all pairwise correlations between group1 and group2"""

    inData = mData.rCRSData(inf)
    inRows = inData[inData.keys()[0]].keys()
    f = open(outf, "w")
    for i in group1:
        if i not in inData:
            continue
        list1 = []
        for k in inRows:
            list1.append(inData[i][k])
        for j in group2:
            if j not in inData:
                continue
            list2 = []
            for k in inRows:
                list2.append(inData[j][k])
            value = method(list1, list2)
            f.write("%s\t%s\t%s\n" % (i, j, value))
    f.close()

Example #2

0

Show file

File: mCalculate.py Project: Al3n70rn/stuartlab-circleplotter-py

def correlationPairwise(outf, inf, group1, group2, method = pcorrelation):
    """Computes all pairwise correlations between group1 and group2"""
    
    inData = mData.rCRSData(inf)
    inRows = inData[inData.keys()[0]].keys()
    f = open(outf, "w")
    for i in group1:
        if i not in inData:
            continue
        list1 = []
        for k in inRows:
            list1.append(inData[i][k])
        for j in group2:
            if j not in inData:
                continue
            list2 = []
            for k in inRows:
                list2.append(inData[j][k])
            value = method(list1, list2)
            f.write("%s\t%s\t%s\n" % (i, j, value))
    f.close()

Example #3

0

Show file

File: mCalculate.py Project: Al3n70rn/stuartlab-circleplotter-py

def correlationMatrix(outf, inf, method = pcorrelation):
    """Takes a tab file and cross-correlates the columns"""
    
    outData = dict()
    inData = mData.rCRSData(inf)
    inCols = inData.keys()
    inRows = inData[inCols[0]].keys()
    for i in inCols:
        outData[i] = dict()
        outData[i][i] = 1.0
    for i in range(len(inCols)-1):
        list1 = []
        for k in inRows:
            list1.append(inData[inCols[i]][k])
        for j in range(i+1, len(inCols)):
            list2 = []
            for k in inRows:
                list2.append(inData[inCols[j]][k])
            value = method(list1, list2)
            outData[inCols[i]][inCols[j]] = value
            outData[inCols[j]][inCols[i]] = value
    mData.wCRSData(outf, outData)

Example #4

0

Show file

File: mCalculate.py Project: smsinks/stuartlab-circleplotter-py

def correlationMatrix(outf, inf, method=pcorrelation):
    """Takes a tab file and cross-correlates the columns"""

    outData = dict()
    inData = mData.rCRSData(inf)
    inCols = inData.keys()
    inRows = inData[inCols[0]].keys()
    for i in inCols:
        outData[i] = dict()
        outData[i][i] = 1.0
    for i in range(len(inCols) - 1):
        list1 = []
        for k in inRows:
            list1.append(inData[inCols[i]][k])
        for j in range(i + 1, len(inCols)):
            list2 = []
            for k in inRows:
                list2.append(inData[inCols[j]][k])
            value = method(list1, list2)
            outData[inCols[i]][inCols[j]] = value
            outData[inCols[j]][inCols[i]] = value
    mData.wCRSData(outf, outData)

Example #5

0

Show file

File: overlap-pathmark.py Project: sng87/pathmark-scripts

            "%s\t%s\t%s\t%s\t%s\n"
            % (
                feature,
                statsMap[feature][1],
                ",".join(membersMap[feature]),
                proportionMap[feature],
                statsMap[feature][0],
            )
        )
    f.close()

    ## summarize pathways
    if iplMatrix is not None:
        pathwayVals = {}
        pathwayScores = {}
        (iplData, iplSamples, iplFeatures) = mData.rCRSData(iplMatrix, retFeatures=True)
        f = open(re.split("/", sifFile)[-2] + ".members.output", "r")
        for line in f:
            line = line.rstrip("\n\r")
            pline = re.split("\t", line)
            if pline[1] in statsMap:
                if pline[1] not in pathwayVals:
                    pathwayVals[pline[1]] = {}
                    for sample in iplSamples:
                        pathwayVals[pline[1]][sample] = []
                if pline[0] in iplFeatures:
                    for sample in iplSamples:
                        pathwayVals[pline[1]][sample].append(abs(float(iplData[sample][pline[0]])))
        f.close()
        for pathwayName in pathwayVals.keys():
            pathwayScores[pathwayName] = {}

Example #6

0

Show file

File: circlePlot.py Project: smsinks/paradigm-scripts

     elif o == "-q":
         verbose = False
 
 ## execute
 samples = []
 features = []
 if sampleFile != None:
     samples = mData.rList(sampleFile)
 if featureFile != None:
     features = mData.rList(featureFile)
 
 ## read circleFiles
 circleData = []
 circleColors = []
 for i in range(len(circleFiles)):
     (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures = True)
     circleData.append(data)
     minCol = rgb(0, 0, 255)
     zerCol = rgb(255, 255, 255)
     maxCol = rgb(255, 0, 0)
     if circleFiles[i].endswith("meth"):
         maxCol = rgb(0, 0, 255)
         minCol = rgb(255, 0, 0)
         log("Color: meth\n")
     elif circleFiles[i].startswith("mut."):
         maxCol = rgb(0, 0, 0)
         minCol = rgb(255, 255, 255)
         log("Color: mut\n")
     circleColors.append( (minCol, zerCol, maxCol) )
     if sampleFile == None:
         samples = list(set(cols) | set(samples))

Example #7

0

Show file

File: circlePlot.py Project: Al3n70rn/stuartlab-circleplotter-py

def cli_routine(outputDir, circleFiles, orderFiles, sampleFile, featureFile, orderFeature, centerFile, colorscaleFile, printLabel, verbose, cohortMinMax=False, purpleHack = True):
	"""Routine for program execution via command-line."""
	# I've tried not to touch this method as much as possible.
	# I don't want to break the way it was working for Sam Ng.
	# chrisw
	
	## execute
	samples = []
	features = []
	if sampleFile != None:
		samples = mData.rList(sampleFile)
	if featureFile != None:
		features = mData.rList(featureFile)
	# end section for getting lists of samples and features
	
	## read circleFiles
	# circleData is a list of dict[col][row]=score from each circleFile
	circleData = []
	# circleColorsPalette is a list of (minColor),(zeroColor),(maxColor)
	circleColorsPalette = []

	## read colorscaleFile
	# the format is as follows - header compulsory:
	# min/max	color coding	color1		color2		color 3
	# -2,2		rgb		155,155,155	255,255,255	0,0,0,
	# -		rgb		155,0,155	255,0,255	0,0,0,
	# the "color format" is intended to support more color format, as I have 
	# seen the html-colors in the code.
	# Michael ([email protected])
	colorscaleData = None
	if colorscaleFile != None:

		if cohortMinMax:
			log("WARNING: The -k option overrides -m")

		colorscaleData = mData.retRows(colorscaleFile,aslist=True)
		line=1 
		for cs in colorscaleData:
			line = line + 1
			if len(cs) != 5:
				log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n", die = True)
			try:
				cs[0] =  [float(x) for x in cs[0].split(",")]
			except ValueError:
				pass
			if len(cs[0]) != 2 and cs[0] != "-":
				print cs[0]
				log("ERROR: Two data points or dash needed for color scale\n", die = True)
			if cs[1].lower() == "rgb":
				try:
					cs[2] =  rgb(*[float(x) for x in cs[2].split(",")])
					cs[3] =  rgb(*[float(x) for x in cs[3].split(",")])
					cs[4] =  rgb(*[float(x) for x in cs[4].split(",")])
				except TypeError:
						log("ERROR: RGB needs three values on line " + str(line) + "\n", die = True)
				except ValueError:
						log("ERROR: RGB color not correctly defined on line " + str(line) + "\n", die=True)
			else:
				log("ERROR: Unknown color coding on line " + str(line) + ": " + str(cs[1]) + "\n", die=True)


	for i in xrange(len(circleFiles)):
		# get data, samples, and features from each circleFile
		# data is a dict[col][row]=score
		# cols is a list of sample names
		# features is a list of feature names
		(data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True)
		circleData.append(data)
		minCol = lightBlueRGB
		zerCol = whiteRGB
		maxCol = redRGB
		if colorscaleFile != None and i<len(colorscaleData):
			#get colors from specified colorscaleFile
			minCol = colorscaleData[i][2]
			zerCol = colorscaleData[i][3]
			maxCol = colorscaleData[i][4]

		# special cases for -meth and -mut
#		if circleFiles[i].endswith("meth"):
#			maxCol = blueRGB
#			minCol = redRGB
#			log("Color: meth\n")
#		elif circleFiles[i].endswith("mut"):
#			maxCol = blackRGB
#			minCol = whiteRGB
#			log("Color: mut\n")

		circleColorsPalette.append((minCol, zerCol, maxCol))

		# if no sampleFile/featureFile, default to using samples/features from circleFiles
		if sampleFile == None:
			samples = list(set(cols) | set(samples))
		if featureFile == None:
			features = list(set(rows) | set(features))
	# end section for reading circleFiles

	## read centerFile
	centerData = None
	if centerFile != None:
		centerData = mData.r2Col(centerFile, header=True)

	## sort
	if orderFeature != None:
		if len(orderFiles) > 0:
			orderData = []
			orderColors = []
			for i in xrange(len(orderFiles)):
				orderData.append(mData.rCRSData(orderFiles[i]))
				minCol = whiteRGB
				zerCol = whiteRGB
				maxCol = blackRGB
				orderColors.append((minCol, zerCol, maxCol))
		else:
			orderData = circleData

		# sort samples based on sample score in orderData
		# priority of sorting determined by orderFiles parameter
		samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData))

		## cohort png
		# cgi will probably not use orderFiles
		if len(orderFiles) > 0:
			imgFile = "%s/Cohort.png" % (outputDir)
			label = "Cohort"
			centerCol = whiteRGB.tohex()
			cohortCircleCols = []
			for i in xrange(len(orderData)):
				ringCols = []
				ringVals = []
				for sample in samples:
					if sample in orderData[i]:
						if orderFeature in orderData[i][sample]:
							ringVals.append(orderData[i][sample][orderFeature])
						elif "*" in orderData[i][sample]:
							ringVals.append(orderData[i][sample]["*"])
				minVal = min([-0.01] + mData.floatList(ringVals))
				maxVal = max([0.01] + mData.floatList(ringVals))
				for sample in samples:
					if sample in orderData[i]:
						if orderFeature in orderData[i][sample]:
							ringCols.append(getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2]))
						elif "*" in orderData[i][sample]:
							ringCols.append(getColor(orderData[i][sample]["*"], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2]))
						else:
							ringCols.append(greyRGB.tohex())
					else:
						ringCols.append(greyRGB.tohex())
				cohortCircleCols.append(ringCols)
			plotCircle(imgFile, label=label, centerCol=centerCol, circleCols=cohortCircleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=5)
	# end section for sample ordering

	## plot images
	if centerData != None:
		centerDataFloatList = mData.floatList(centerData.values())
		centerDataMinVal = min([-0.01] + centerDataFloatList)
		centerDataMaxVal = max([0.01] + centerDataFloatList)

	# get min/max values for datasets
	if cohortMinMax:
		(minValList, maxValList) = getCohortMinMaxValues(features, samples, circleData)
	else:
		(minValList, maxValList) = (None, None)

	if colorscaleData != None:
		(minValList, maxValList) = getColorScaleMinMaxValues(minValList, maxValList, len(circleData), colorscaleData)

	for feature in features:
		log("Drawing %s\n" % (feature))
		centerColHex = None
		if centerData != None:
			if feature in centerData:
				centerColHex = getColor(centerData[feature], centerDataMinVal, centerDataMaxVal, minColor=lightBlueRGB, zeroColor=whiteRGB, purple0Hack=purpleHack)
				
		imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature))

		label = ""
		if printLabel:
			label = feature

		image_width = 5.0

		drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColorsPalette, width=image_width, centerColHex=centerColHex, minValList=minValList, maxValList=maxValList, purple0Hack=purpleHack)

	for sample in samples:
		log("ordered samples: %s\n" % (sample))

Example #8

0

Show file

File: update-html.py Project: sng87/pathmark-scripts

 if i.endswith(".html"):
     continue
 elif i.endswith(".py"):
     continue
 elif i.endswith("js"):
     continue
 elif i.endswith("swf"):
     continue
 else:
     log("Working %s ...\n" % (i))
     d.write(htmlIndexItem % ("%s.html" % (i), i))
     f = open("%s.html" % (i), "w")
     f.write(htmlHead)
     f.write(htmlCategory % i)
     if os.path.exists("%s/stats.tab" % (i)):
         (samData, samCols, samItems) = mData.rCRSData("%s/stats.tab" % (i), retFeatures = True)
         samCols = ["Link"] + samCols
     else:
         samData = {}
         samCols = ["Link", "Note"]
         for j in samCols:
             samData[j] = {}
         samItems = []
         for j in os.listdir(i):
             if j.startswith("img"):
                 continue
             samItems.append(re.sub(".html", "", j))
         for j in samItems:
             samData["Link"][j] = "%s.html" % (j)
             samData["Note"][j] = ""
     f.write(htmlTableHead % ("</th>\n            <th>".join(samCols)))

Example #9

0

Show file

File: filter-subnets.py Project: decarlin/stuartlab-scripts

def filterNet(files, phenotypes = [], statLine = None, outDir = None):
    global filterBounds
    filterString = "%s_%s" % (filterBounds[0], filterBounds[1])
    
    ## read global pathway
    (gNodes, gInteractions) = mPathway.rPathway(globalPathway)
    
    ## read drugs
    #drugData = mData.rSet(drugBank)
    
    ## write LABEL.NA, TYPE.NA
    if outputAttributes:
        typef = open("TYPE.NA", "w")
        labelf = open("LABEL.NA", "w")
        typef.write("TYPE (class=java.lang.String)\n")
        labelf.write("LABEL (class=java.lang.String)\n")
        for i in gNodes.keys():
            typef.write("%s = %s\n" % (i, gNodes[i]))
            if gNodes[i] == "protein":
                labelf.write("%s = %s\n" % (i, i))
            else:
                labelf.write("%s = %s\n" % (i, ""))
        #drugs here
        typef.close()
        labelf.close()
    
    ## read scores
    uData = dict()
    sData = dict()
    for i in range(len(files)):
        uData[i] = mData.rCRSData(files[i])
        sData[i] = dict()
        for j in uData[i].keys():
            sData[i][j] = dict()
            for k in uData[i][j].keys():
                try:
                    sData[i][j][k] = abs(float(uData[i][j][k]))
                except ValueError:
                    sData[i][j][k] = "NA"
    
    ## iterate phenotypes
    for p in sData[0].keys():
        if len(phenotypes) > 0:
            if p not in phenotypes:
                continue
        pNodes = dict()
        pInteractions = dict()
        
        ## write SCORE.NA
        if outputAttributes:
            scoref = open(p+"_SCORE.NA", "w")
            scoref.write("SCORE (class=java.lang.Float)\n")
            for i in gNodes.keys():
                if i in uData[0][p]:
                    if uData[0][p][i] == "NA":
                        scoref.write("%s = %s\n" % (i, "0"))
                    else:
                        scoref.write("%s = %s\n" % (i, uData[0][p][i]))
                else:
                    scoref.write("%s = %s\n" % (i, "0"))
            scoref.close()
        
        ## compute thresholds
        pStats = []
        if statLine == None:
            for i in range(len(sData.keys())):
                pStats.append(mCalculate.mean_std(sData[i][p].values()))
        else:
            for i in re.split(",",statLine):
                (v1, v2) = re.split(";",i)
                pStats.append((float(v1), float(v2)))
        log("%s\t%s;%s" % (p, pStats[0][0], pStats[0][1]))
        for i in range(1, len(pStats)):
            log(",%s;%s" % (pStats[i][0], pStats[i][1]))
        log("\n")
        
        ## iterate links
        for a in gInteractions.keys():
            if a not in sData[0][p]:
                continue
            elif sData[0][p][a] == "NA":
                continue
            for b in gInteractions[a].keys():
                if b not in sData[0][p]:
                    continue
                elif sData[0][p][b] == "NA":
                    continue
                ## score nodes by threshold
                aScore = []
                bScore = []
                linkScore = []
                for i in range(len(sData.keys())):
                    linkScore.append([sData[i][p][a], sData[i][p][b]])
                for i in range(len(sData.keys())):
                    if linkScore[i][0] > pStats[i][0]+filterBounds[1]*pStats[i][1]:
                        aScore.append(2)
                    elif linkScore[i][0] > pStats[i][0]+filterBounds[0]*pStats[i][1]:
                        aScore.append(1)
                    else:
                        aScore.append(0)
                    if linkScore[i][1] > pStats[i][0]+filterBounds[1]*pStats[i][1]:
                        bScore.append(2)
                    elif linkScore[i][1] > pStats[i][0]+filterBounds[0]*pStats[i][1]:
                        bScore.append(1)
                    else:
                        bScore.append(0)
                
                ## selection rule
                if includeType == "OR":
                    if max(aScore)+max(bScore) >= 3:
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
                elif includeType == "AND":
                    votes = 0
                    for i in range(len(sData.keys())):
                        if aScore[i]+bScore[i] >= 3:
                            votes += 0
                    if votes == len(sData.keys()):
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
                elif includeType == "MAIN":
                    if aScore[0]+bScore[0] >= 3:
                        (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions)
        
        ## connect top scoring disconnected nodes
        sortedTop = []
        for i in sData[0][p].keys():
            if i not in gNodes:
                continue
            if gNodes[i] in ["protein"]:
                sortedTop.append(i)
        sortedTop.sort(lambda x, y: cmp(sData[0][p][y],sData[0][p][x]))
        while (sData[0][p][sortedTop[0]] == "NA"):
            sortedTop.pop(0)
            if len(sortedTop) == 0:
                break
        for i in range(topDisconnected):
            if i > len(sortedTop)-1:
                break
            if sData[0][p][sortedTop[i]] < pStats[0][0]+filterBounds[0]*pStats[0][1]:
                break
            if sortedTop[i] not in gNodes:
                continue
            if sortedTop[i] not in pNodes:
                pNodes[sortedTop[i]] = gNodes[sortedTop[i]]
                pInteractions[sortedTop[i]] = dict()
                pInteractions[sortedTop[i]]["__DISCONNECTED__"] = "-disconnected-"
        
        ## output
        if outDir == None:
            wrtDir = p
        else:
            wrtDir = outDir
        if not os.path.exists(wrtDir):
            os.system("mkdir %s" % (wrtDir))

        ## output for pathway-predictor
        if outputPARADIGM:
            protSet = set()
            for i in gNodes:
                if gNodes[i] == "protein":
                    protSet.update([i])
            netNodes = mPathway.sortConnected(pNodes, pInteractions, mPathway.revInteractions(pInteractions))
            trainNodes = []
            for i in netNodes:
                if len((protSet) & set(i)) > featureReq:
                    trainNodes += i
            if len(trainNodes) == 0:
                log("ERROR: no nets contained enough data\n...trying again\n")
                if filterBounds[0]+0.1 <= filterBounds[1]:
                    filterBounds[1] -= 0.1
                else:
                    filterBounds[0] -= 0.1
                    filterBounds[1] -= 0.1
                filterNet(files, phenotypes = phenotypes, statLine = statLine, outDir = outDir)
                sys.exit(0)
            (lNodes, lInteractions) = mPathway.constructInteractions(trainNodes, pNodes, pInteractions)
            if outputAttributes:
                mPathway.wSIF("%s/%s_%s_pp.sif" % (wrtDir, p, filterString), lInteractions)
            ## connect class node
            classNode = "class"
            lInteractions[classNode] = dict()
            for i in lNodes.keys():
                if i not in protSet:
                    continue
                lInteractions[classNode][i] = "-cl>"
            lNodes[classNode] = "active"
            mPathway.wPathway("%s/%s_%s_pp.tab" % (wrtDir, p, filterString), lNodes, lInteractions)        
        ## output nodrug pathway
        else:
            mPathway.wSIF("%s/%s_%s_nodrug.sif" % (wrtDir, p, filterString), pInteractions)
            (cpNodes, cpInteractions) = mPathway.filterComplexesByGeneSupport(pNodes, pInteractions, 
                                        mPathway.revInteractions(pInteractions), gNodes,
                                        mPathway.getComponentMap(gNodes, mPathway.revInteractions(gInteractions)))
            mPathway.wSIF("%s/%s_%s_nodrug_cleaned.sif" % (wrtDir, p, filterString), cpInteractions)

Example #10

0

Show file

    ## output summary
    f = open(re.split("/", sifFile)[-2] + ".summary.tab", "w")
    features = statsMap.keys()
    features.sort(lambda x, y: cmp(statsMap[x][1], statsMap[y][1]))
    for feature in features:
        f.write("%s\t%s\t%s\t%s\t%s\n" %
                (feature, statsMap[feature][1], ",".join(membersMap[feature]),
                 proportionMap[feature], statsMap[feature][0]))
    f.close()

    ## summarize pathways
    if iplMatrix is not None:
        pathwayVals = {}
        pathwayScores = {}
        (iplData, iplSamples, iplFeatures) = mData.rCRSData(iplMatrix,
                                                            retFeatures=True)
        f = open(re.split("/", sifFile)[-2] + ".members.output", "r")
        for line in f:
            line = line.rstrip("\n\r")
            pline = re.split("\t", line)
            if pline[1] in statsMap:
                if pline[1] not in pathwayVals:
                    pathwayVals[pline[1]] = {}
                    for sample in iplSamples:
                        pathwayVals[pline[1]][sample] = []
                if pline[0] in iplFeatures:
                    for sample in iplSamples:
                        pathwayVals[pline[1]][sample].append(
                            abs(float(iplData[sample][pline[0]])))
        f.close()
        for pathwayName in pathwayVals.keys():

Example #11

0

Show file

     continue
 elif i.endswith(".py"):
     continue
 elif i.endswith("js"):
     continue
 elif i.endswith("swf"):
     continue
 else:
     log("Working %s ...\n" % (i))
     d.write(htmlIndexItem % ("%s.html" % (i), i))
     f = open("%s.html" % (i), "w")
     f.write(htmlHead)
     f.write(htmlCategory % i)
     if os.path.exists("%s/stats.tab" % (i)):
         (samData, samCols,
          samItems) = mData.rCRSData("%s/stats.tab" % (i),
                                     retFeatures=True)
         samCols = ["Link"] + samCols
     else:
         samData = {}
         samCols = ["Link", "Note"]
         for j in samCols:
             samData[j] = {}
         samItems = []
         for j in os.listdir(i):
             if j.startswith("img"):
                 continue
             samItems.append(re.sub(".html", "", j))
         for j in samItems:
             samData["Link"][j] = "%s.html" % (j)
             samData["Note"][j] = ""
     f.write(htmlTableHead % ("</th>\n            <th>".join(samCols)))

Example #12

0

Show file

def cli_routine(outputDir,
                circleFiles,
                orderFiles,
                sampleFile,
                featureFile,
                orderFeature,
                centerFile,
                colorscaleFile,
                printLabel,
                verbose,
                cohortMinMax=False,
                purpleHack=True):
    """Routine for program execution via command-line."""
    # I've tried not to touch this method as much as possible.
    # I don't want to break the way it was working for Sam Ng.
    # chrisw

    ## execute
    samples = []
    features = []
    if sampleFile != None:
        samples = mData.rList(sampleFile)
    if featureFile != None:
        features = mData.rList(featureFile)
    # end section for getting lists of samples and features

    ## read circleFiles
    # circleData is a list of dict[col][row]=score from each circleFile
    circleData = []
    # circleColorsPalette is a list of (minColor),(zeroColor),(maxColor)
    circleColorsPalette = []

    ## read colorscaleFile
    # the format is as follows - header compulsory:
    # min/max	color coding	color1		color2		color 3
    # -2,2		rgb		155,155,155	255,255,255	0,0,0,
    # -		rgb		155,0,155	255,0,255	0,0,0,
    # the "color format" is intended to support more color format, as I have
    # seen the html-colors in the code.
    # Michael ([email protected])
    colorscaleData = None
    if colorscaleFile != None:

        if cohortMinMax:
            log("WARNING: The -k option overrides -m")

        colorscaleData = mData.retRows(colorscaleFile, aslist=True)
        line = 1
        for cs in colorscaleData:
            line = line + 1
            if len(cs) != 5:
                log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n",
                    die=True)
            try:
                cs[0] = [float(x) for x in cs[0].split(",")]
            except ValueError:
                pass
            if len(cs[0]) != 2 and cs[0] != "-":
                print cs[0]
                log("ERROR: Two data points or dash needed for color scale\n",
                    die=True)
            if cs[1].lower() == "rgb":
                try:
                    cs[2] = rgb(*[float(x) for x in cs[2].split(",")])
                    cs[3] = rgb(*[float(x) for x in cs[3].split(",")])
                    cs[4] = rgb(*[float(x) for x in cs[4].split(",")])
                except TypeError:
                    log("ERROR: RGB needs three values on line " + str(line) +
                        "\n",
                        die=True)
                except ValueError:
                    log("ERROR: RGB color not correctly defined on line " +
                        str(line) + "\n",
                        die=True)
            else:
                log("ERROR: Unknown color coding on line " + str(line) + ": " +
                    str(cs[1]) + "\n",
                    die=True)

    for i in xrange(len(circleFiles)):
        # get data, samples, and features from each circleFile
        # data is a dict[col][row]=score
        # cols is a list of sample names
        # features is a list of feature names
        (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True)
        circleData.append(data)
        minCol = lightBlueRGB
        zerCol = whiteRGB
        maxCol = redRGB
        if colorscaleFile != None and i < len(colorscaleData):
            #get colors from specified colorscaleFile
            minCol = colorscaleData[i][2]
            zerCol = colorscaleData[i][3]
            maxCol = colorscaleData[i][4]

        # special cases for -meth and -mut


#		if circleFiles[i].endswith("meth"):
#			maxCol = blueRGB
#			minCol = redRGB
#			log("Color: meth\n")
#		elif circleFiles[i].endswith("mut"):
#			maxCol = blackRGB
#			minCol = whiteRGB
#			log("Color: mut\n")

        circleColorsPalette.append((minCol, zerCol, maxCol))

        # if no sampleFile/featureFile, default to using samples/features from circleFiles
        if sampleFile == None:
            samples = list(set(cols) | set(samples))
        if featureFile == None:
            features = list(set(rows) | set(features))
    # end section for reading circleFiles

    ## read centerFile
    centerData = None
    if centerFile != None:
        centerData = mData.r2Col(centerFile, header=True)

    ## sort
    if orderFeature != None:
        if len(orderFiles) > 0:
            orderData = []
            orderColors = []
            for i in xrange(len(orderFiles)):
                orderData.append(mData.rCRSData(orderFiles[i]))
                minCol = whiteRGB
                zerCol = whiteRGB
                maxCol = blackRGB
                orderColors.append((minCol, zerCol, maxCol))
        else:
            orderData = circleData

        # sort samples based on sample score in orderData
        # priority of sorting determined by orderFiles parameter
        samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData))

        ## cohort png
        # cgi will probably not use orderFiles
        if len(orderFiles) > 0:
            imgFile = "%s/Cohort.png" % (outputDir)
            label = "Cohort"
            centerCol = whiteRGB.tohex()
            cohortCircleCols = []
            for i in xrange(len(orderData)):
                ringCols = []
                ringVals = []
                for sample in samples:
                    if sample in orderData[i]:
                        if orderFeature in orderData[i][sample]:
                            ringVals.append(orderData[i][sample][orderFeature])
                        elif "*" in orderData[i][sample]:
                            ringVals.append(orderData[i][sample]["*"])
                minVal = min([-0.01] + mData.floatList(ringVals))
                maxVal = max([0.01] + mData.floatList(ringVals))
                for sample in samples:
                    if sample in orderData[i]:
                        if orderFeature in orderData[i][sample]:
                            ringCols.append(
                                getColor(orderData[i][sample][orderFeature],
                                         minVal,
                                         maxVal,
                                         minColor=orderColors[i][0],
                                         zeroColor=orderColors[i][1],
                                         maxColor=orderColors[i][2]))
                        elif "*" in orderData[i][sample]:
                            ringCols.append(
                                getColor(orderData[i][sample]["*"],
                                         minVal,
                                         maxVal,
                                         minColor=orderColors[i][0],
                                         zeroColor=orderColors[i][1],
                                         maxColor=orderColors[i][2]))
                        else:
                            ringCols.append(greyRGB.tohex())
                    else:
                        ringCols.append(greyRGB.tohex())
                cohortCircleCols.append(ringCols)
            plotCircle(imgFile,
                       label=label,
                       centerCol=centerCol,
                       circleCols=cohortCircleCols,
                       innerRadTotal=0.2,
                       outerRadTotal=0.5,
                       width=5)
    # end section for sample ordering

    ## plot images
    if centerData != None:
        centerDataFloatList = mData.floatList(centerData.values())
        centerDataMinVal = min([-0.01] + centerDataFloatList)
        centerDataMaxVal = max([0.01] + centerDataFloatList)

    # get min/max values for datasets
    if cohortMinMax:
        (minValList,
         maxValList) = getCohortMinMaxValues(features, samples, circleData)
    else:
        (minValList, maxValList) = (None, None)

    if colorscaleData != None:
        (minValList,
         maxValList) = getColorScaleMinMaxValues(minValList, maxValList,
                                                 len(circleData),
                                                 colorscaleData)

    for feature in features:
        log("Drawing %s\n" % (feature))
        centerColHex = None
        if centerData != None:
            if feature in centerData:
                centerColHex = getColor(centerData[feature],
                                        centerDataMinVal,
                                        centerDataMaxVal,
                                        minColor=lightBlueRGB,
                                        zeroColor=whiteRGB,
                                        purple0Hack=purpleHack)

        imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature))

        label = ""
        if printLabel:
            label = feature

        image_width = 5.0

        drawCircleImageForFeature(feature,
                                  samples,
                                  label,
                                  imgFile,
                                  circleData,
                                  circleColorsPalette,
                                  width=image_width,
                                  centerColHex=centerColHex,
                                  minValList=minValList,
                                  maxValList=maxValList,
                                  purple0Hack=purpleHack)

    for sample in samples:
        log("ordered samples: %s\n" % (sample))

Example #13

0

Show file

        log("ERROR: incorrect number of arguments", die=True)

    phenotypeFile = args[0]
    dataFile = args[1]

    global verbose
    for o, a in opts:
        if o == "-q":
            verbose = False

    ## execute
    phenotypeName = re.split("/", phenotypeFile)[-1].rstrip(".tab")
    dataName = re.split("/", dataFile)[-1].rstrip(".tab")
    outputDir = "OCCAM__%s__%s" % (phenotypeName, dataName)
    syscmd("mkdir %s" % (outputDir))
    (phenData, phenColumns, phenRows) = mData.rCRSData(phenotypeFile,
                                                       retFeatures=True)
    (matData, matColumns, matRows) = mData.rCRSData(dataFile, retFeatures=True)

    ## samples
    posSamples = []
    negSamples = []
    for sample in phenRows:
        if sample not in matColumns:
            continue
        if phenData[phenColumns[phenIndex]][sample] == "+":
            posSamples.append(sample)
        elif phenData[phenColumns[phenIndex]][sample] == "-":
            negSamples.append(sample)

    ## output
    f = open("%s/results.tab" % (outputDir), "w")

Example #14

0

Show file

File: tOCCAM.py Project: sng87/pathmark-scripts

     log("ERROR: incorrect number of arguments", die = True)
 
 phenotypeFile = args[0]
 dataFile = args[1]
 
 global verbose
 for o, a in opts:
     if o == "-q":
         verbose = False
 
 ## execute
 phenotypeName = re.split("/", phenotypeFile)[-1].rstrip(".tab")
 dataName = re.split("/", dataFile)[-1].rstrip(".tab")
 outputDir = "OCCAM__%s__%s" % (phenotypeName, dataName)
 syscmd("mkdir %s" % (outputDir))
 (phenData, phenColumns, phenRows) = mData.rCRSData(phenotypeFile, retFeatures = True)
 (matData, matColumns, matRows) = mData.rCRSData(dataFile, retFeatures = True)
 
 ## samples
 posSamples = []
 negSamples = []
 for sample in phenRows:
     if sample not in matColumns:
         continue
     if phenData[phenColumns[phenIndex]][sample] == "+":
         posSamples.append(sample)
     elif phenData[phenColumns[phenIndex]][sample] == "-":
         negSamples.append(sample)
 
 ## output
 f = open("%s/results.tab" % (outputDir), "w")

Example #15

0

Show file

        usage(1)
    
    if (len(args) == 1):
        tabFile = sys.stdin
        htmlFile = args[0]
    else:
        tabFile = args[0]
        htmlFile = args[1]
    
    global verbose
    for o, a in opts:
        if o == "-q":
            verbose = False
    
    ## read tabFile
    (tabData, tabCols, tabRows) = mData.rCRSData(tabFile, retFeatures = True)
    
    ## write htmlFile
    f = open("%s" % (htmlFile), "w")
    f.write(htmlHead)
    tabCols = ["id"] + tabCols
    f.write(htmlTableHead % ("</th>\n            <th>".join(tabCols)))    
    for i in tabRows:
        tabList = []
        for j in tabCols[1:]:
            tabList.append(str(tabData[j][i]))
        f.write(htmlTableItem % (i, "</td>\n            <td>".join(tabList)))
    f.write(htmlTableTail)
    f.write(htmlTail)
    f.close()

Example #16

0

Show file

File: circlePlot.py Project: mokolodi1/Patient-pCHIP

	if sampleFile != None:
		samples = mData.rList(sampleFile)
	if featureFile != None:
		features = mData.rList(featureFile)

	## read circleFiles
	circleData = []
	circleColors = []
	##
	## record file types for each, effects the color scheme 
	## use the input index for each
	color_scheme_map = {}
	for i in range(len(circleFiles)):
		circleFile, colorScheme = circleFiles[i].split(':')
		color_scheme_map[i] = parseColorScheme(colorScheme)
		(data, cols, rows) = mData.rCRSData(circleFile, retFeatures = True)
		circleData.append(data)
		#circleColors.append( (minCol, zerCol, maxCol) )
		#if sampleFile == None:
		#	samples = list(set(cols) | set(samples))
		#if featureFile == None:
		#	features = list(set(rows) | set(features))
	
	mutationData = None
	if mutationsFile != None:
		mutationData, cols, rows = mData.rCRSData(mutationsFile, retFeatures = True)

	## read centerFile
	centerData = None
	if centerFile != None:
		centerData = mData.r2Col(centerFile, header = True)