def mean(inList, null = "NA"): """Calculates mean""" cList = mData.floatList(inList) if len(cList) == 0: mean = null else: mean = sum(cList)/len(cList) return (mean)
def mean(inList, null="NA"): """Calculates mean""" cList = mData.floatList(inList) if len(cList) == 0: mean = null else: mean = sum(cList) / len(cList) return (mean)
def quartiles(inList): """Returns the 25/50/75 quartiles""" cList = mData.floatList(inList) cList.sort() if len(cList) < 2: boundaries = ["NA", "NA", "NA"] else: boundaries = [median(cList[:len(cList)/2]), median(cList), median(cList[len(cList)/2:])] return(boundaries)
def median(inList): """Calculates median""" cList = mData.floatList(inList) cList.sort() if len(cList) == 0: median = "NA" else: if len(cList)%2 == 1: median = cList[len(cList)/2] else: median = (cList[len(cList)/2]+cList[(len(cList)/2)-1])/2.0 return(median)
def median(inList): """Calculates median""" cList = mData.floatList(inList) cList.sort() if len(cList) == 0: median = "NA" else: if len(cList) % 2 == 1: median = cList[len(cList) / 2] else: median = (cList[len(cList) / 2] + cList[(len(cList) / 2) - 1]) / 2.0 return (median)
def quartiles(inList): """Returns the 25/50/75 quartiles""" cList = mData.floatList(inList) cList.sort() if len(cList) < 2: boundaries = ["NA", "NA", "NA"] else: boundaries = [ median(cList[:len(cList) / 2]), median(cList), median(cList[len(cList) / 2:]) ] return (boundaries)
def mean_std(inList, sample=True): """Calculates mean and std""" cList = mData.floatList(inList) if len(cList) == 0: mean = "NA" std = "NA" else: mean = sum(cList) / float(len(cList)) std = 0.0 for i in cList: std += (i - mean)**2 if len(cList) > 1: if sample: std = math.sqrt(std / (len(cList) - 1)) else: std = math.sqrt(std / len(cList)) else: std = 0.0 return (mean, std)
def mean_std(inList, sample = True): """Calculates mean and std""" cList = mData.floatList(inList) if len(cList) == 0: mean = "NA" std = "NA" else: mean = sum(cList)/float(len(cList)) std = 0.0 for i in cList: std += (i-mean)**2 if len(cList) > 1: if sample: std = math.sqrt(std/(len(cList)-1)) else: std = math.sqrt(std/len(cList)) else: std = 0.0 return(mean, std)
def getCohortMinMaxValues(featureList, sampleList, circleData): """Get the minVal and maxVal of sample scores among the specified featureList for the ring/dataset.""" minValList = [] maxValList = [] for ring in xrange(len(circleData)): ringVals = [] # get ring values in effort to find min/max values for each *ring* for sample in sampleList: if sample in circleData[ring]: for feature in featureList: if feature in circleData[ring][sample]: ringVals.append(circleData[ring][sample][feature]) elif "*" in circleData[ring][sample]: ringVals.append(circleData[ring][sample]["*"]) # find the min & max sample scores for this ring in this feature floatList = mData.floatList(ringVals) minValList.append(min([-0.01] + floatList)) maxValList.append(max([0.01] + floatList)) return (minValList, maxValList)
## cohort png if len(orderFiles) > 0: imgFile = "%s/Cohort.png" % (outputDir) label = "Cohort" centerCol = rgb(255, 255, 255).tohex() circleCols = [] for i in range(len(orderData)): ringCols = [] ringVals = [] for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringVals.append(orderData[i][sample][orderFeature]) elif "*" in orderData[i][sample]: ringVals.append(orderData[i][sample]["*"]) minVal = min([-0.01]+mData.floatList(ringVals)) maxVal = max([0.01]+mData.floatList(ringVals)) for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor = orderColors[i][0], zeroColor = orderColors[i][1], maxColor = orderColors[i][2])) elif "*" in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample]["*"], minVal, maxVal, minColor = orderColors[i][0], zeroColor = orderColors[i][1], maxColor = orderColors[i][2])) else: ringCols.append(rgb(200, 200, 200).tohex()) else: ringCols.append(rgb(200, 200, 200).tohex()) circleCols.append(ringCols) plotCircle(imgFile, label = label, centerCol = centerCol, circleCols = circleCols, innerRadTotal=0.2, outerRadTotal=0.5, width = 5) ## plot images
def cli_routine(outputDir, circleFiles, orderFiles, sampleFile, featureFile, orderFeature, centerFile, colorscaleFile, printLabel, verbose, cohortMinMax=False, purpleHack = True): """Routine for program execution via command-line.""" # I've tried not to touch this method as much as possible. # I don't want to break the way it was working for Sam Ng. # chrisw ## execute samples = [] features = [] if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) # end section for getting lists of samples and features ## read circleFiles # circleData is a list of dict[col][row]=score from each circleFile circleData = [] # circleColorsPalette is a list of (minColor),(zeroColor),(maxColor) circleColorsPalette = [] ## read colorscaleFile # the format is as follows - header compulsory: # min/max color coding color1 color2 color 3 # -2,2 rgb 155,155,155 255,255,255 0,0,0, # - rgb 155,0,155 255,0,255 0,0,0, # the "color format" is intended to support more color format, as I have # seen the html-colors in the code. # Michael ([email protected]) colorscaleData = None if colorscaleFile != None: if cohortMinMax: log("WARNING: The -k option overrides -m") colorscaleData = mData.retRows(colorscaleFile,aslist=True) line=1 for cs in colorscaleData: line = line + 1 if len(cs) != 5: log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n", die = True) try: cs[0] = [float(x) for x in cs[0].split(",")] except ValueError: pass if len(cs[0]) != 2 and cs[0] != "-": print cs[0] log("ERROR: Two data points or dash needed for color scale\n", die = True) if cs[1].lower() == "rgb": try: cs[2] = rgb(*[float(x) for x in cs[2].split(",")]) cs[3] = rgb(*[float(x) for x in cs[3].split(",")]) cs[4] = rgb(*[float(x) for x in cs[4].split(",")]) except TypeError: log("ERROR: RGB needs three values on line " + str(line) + "\n", die = True) except ValueError: log("ERROR: RGB color not correctly defined on line " + str(line) + "\n", die=True) else: log("ERROR: Unknown color coding on line " + str(line) + ": " + str(cs[1]) + "\n", die=True) for i in xrange(len(circleFiles)): # get data, samples, and features from each circleFile # data is a dict[col][row]=score # cols is a list of sample names # features is a list of feature names (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True) circleData.append(data) minCol = lightBlueRGB zerCol = whiteRGB maxCol = redRGB if colorscaleFile != None and i<len(colorscaleData): #get colors from specified colorscaleFile minCol = colorscaleData[i][2] zerCol = colorscaleData[i][3] maxCol = colorscaleData[i][4] # special cases for -meth and -mut # if circleFiles[i].endswith("meth"): # maxCol = blueRGB # minCol = redRGB # log("Color: meth\n") # elif circleFiles[i].endswith("mut"): # maxCol = blackRGB # minCol = whiteRGB # log("Color: mut\n") circleColorsPalette.append((minCol, zerCol, maxCol)) # if no sampleFile/featureFile, default to using samples/features from circleFiles if sampleFile == None: samples = list(set(cols) | set(samples)) if featureFile == None: features = list(set(rows) | set(features)) # end section for reading circleFiles ## read centerFile centerData = None if centerFile != None: centerData = mData.r2Col(centerFile, header=True) ## sort if orderFeature != None: if len(orderFiles) > 0: orderData = [] orderColors = [] for i in xrange(len(orderFiles)): orderData.append(mData.rCRSData(orderFiles[i])) minCol = whiteRGB zerCol = whiteRGB maxCol = blackRGB orderColors.append((minCol, zerCol, maxCol)) else: orderData = circleData # sort samples based on sample score in orderData # priority of sorting determined by orderFiles parameter samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData)) ## cohort png # cgi will probably not use orderFiles if len(orderFiles) > 0: imgFile = "%s/Cohort.png" % (outputDir) label = "Cohort" centerCol = whiteRGB.tohex() cohortCircleCols = [] for i in xrange(len(orderData)): ringCols = [] ringVals = [] for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringVals.append(orderData[i][sample][orderFeature]) elif "*" in orderData[i][sample]: ringVals.append(orderData[i][sample]["*"]) minVal = min([-0.01] + mData.floatList(ringVals)) maxVal = max([0.01] + mData.floatList(ringVals)) for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) elif "*" in orderData[i][sample]: ringCols.append(getColor(orderData[i][sample]["*"], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) else: ringCols.append(greyRGB.tohex()) else: ringCols.append(greyRGB.tohex()) cohortCircleCols.append(ringCols) plotCircle(imgFile, label=label, centerCol=centerCol, circleCols=cohortCircleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=5) # end section for sample ordering ## plot images if centerData != None: centerDataFloatList = mData.floatList(centerData.values()) centerDataMinVal = min([-0.01] + centerDataFloatList) centerDataMaxVal = max([0.01] + centerDataFloatList) # get min/max values for datasets if cohortMinMax: (minValList, maxValList) = getCohortMinMaxValues(features, samples, circleData) else: (minValList, maxValList) = (None, None) if colorscaleData != None: (minValList, maxValList) = getColorScaleMinMaxValues(minValList, maxValList, len(circleData), colorscaleData) for feature in features: log("Drawing %s\n" % (feature)) centerColHex = None if centerData != None: if feature in centerData: centerColHex = getColor(centerData[feature], centerDataMinVal, centerDataMaxVal, minColor=lightBlueRGB, zeroColor=whiteRGB, purple0Hack=purpleHack) imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature)) label = "" if printLabel: label = feature image_width = 5.0 drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColorsPalette, width=image_width, centerColHex=centerColHex, minValList=minValList, maxValList=maxValList, purple0Hack=purpleHack) for sample in samples: log("ordered samples: %s\n" % (sample))
def drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColors, centerColHex=None, width=5, minValList=None, maxValList=None, purple0Hack=False): """Draw a circle map image and write it to a file.""" # feature - feature to draw image for. This is some kind of concept: for example, a gene. # samples - sample names of data # label - label to use in image # imgFile - file object to which image will be written # circleData - data struct containing sample data for features. It is a list of dict[col][row]=score . # circleColors - a list of (minColor),(zeroColor),(maxColor) # centerColHex - hex code for center color fill. If none, then make transparent center. # centerCol is the color of the center of the circleImage # centerCol = whiteRGB.tohex() # circleCols is a list. Each member of the list represents a list of colors in a ring. circleCols = [] # iterate through rings of data for ring in xrange(len(circleData)): ringCols = [] # get minVal and maxVal minVal = None maxVal = None if minValList == None or maxValList == None or minValList[ring] == None or maxValList[ring] == None: ringVals = [] # get ring values in effort to find min/max values for each *ring* for sample in samples: if sample in circleData[ring]: if feature in circleData[ring][sample]: ringVals.append(circleData[ring][sample][feature]) elif "*" in circleData[ring][sample]: ringVals.append(circleData[ring][sample]["*"]) # find the min & max sample scores for this ring in this feature floatList = mData.floatList(ringVals) minVal = min([-0.01] + floatList) maxVal = max([0.01] + floatList) else: minVal = minValList[ring] maxVal = maxValList[ring] # convert scores into colors for sample in samples: if sample in circleData[ring]: if feature in circleData[ring][sample]: ringCols.append(getColor(circleData[ring][sample][feature], minVal, maxVal, minColor=circleColors[ring][0], zeroColor=circleColors[ring][1], maxColor=circleColors[ring][2], purple0Hack=purple0Hack)) elif "*" in circleData[ring][sample]: ringCols.append(getColor(circleData[ring][sample]["*"], minVal, maxVal, minColor=circleColors[ring][0], zeroColor=circleColors[ring][1], maxColor=circleColors[ring][2], purple0Hack=purple0Hack)) else: # sample exists, but no score for the feature ringCols.append(greyRGB.tohex()) else: # this sample not found in the sample data ringCols.append(greyRGB.tohex()) # add the ring circleCols.append(ringCols) # plot the image plotCircle(imgFile, label=label, centerColHex=centerColHex, circleCols=circleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=width)
def cli_routine(outputDir, circleFiles, orderFiles, sampleFile, featureFile, orderFeature, centerFile, colorscaleFile, printLabel, verbose, cohortMinMax=False, purpleHack=True): """Routine for program execution via command-line.""" # I've tried not to touch this method as much as possible. # I don't want to break the way it was working for Sam Ng. # chrisw ## execute samples = [] features = [] if sampleFile != None: samples = mData.rList(sampleFile) if featureFile != None: features = mData.rList(featureFile) # end section for getting lists of samples and features ## read circleFiles # circleData is a list of dict[col][row]=score from each circleFile circleData = [] # circleColorsPalette is a list of (minColor),(zeroColor),(maxColor) circleColorsPalette = [] ## read colorscaleFile # the format is as follows - header compulsory: # min/max color coding color1 color2 color 3 # -2,2 rgb 155,155,155 255,255,255 0,0,0, # - rgb 155,0,155 255,0,255 0,0,0, # the "color format" is intended to support more color format, as I have # seen the html-colors in the code. # Michael ([email protected]) colorscaleData = None if colorscaleFile != None: if cohortMinMax: log("WARNING: The -k option overrides -m") colorscaleData = mData.retRows(colorscaleFile, aslist=True) line = 1 for cs in colorscaleData: line = line + 1 if len(cs) != 5: log("ERROR: color scale needs five fields: datapoints, colorcoding(rgb) and three colors\n", die=True) try: cs[0] = [float(x) for x in cs[0].split(",")] except ValueError: pass if len(cs[0]) != 2 and cs[0] != "-": print cs[0] log("ERROR: Two data points or dash needed for color scale\n", die=True) if cs[1].lower() == "rgb": try: cs[2] = rgb(*[float(x) for x in cs[2].split(",")]) cs[3] = rgb(*[float(x) for x in cs[3].split(",")]) cs[4] = rgb(*[float(x) for x in cs[4].split(",")]) except TypeError: log("ERROR: RGB needs three values on line " + str(line) + "\n", die=True) except ValueError: log("ERROR: RGB color not correctly defined on line " + str(line) + "\n", die=True) else: log("ERROR: Unknown color coding on line " + str(line) + ": " + str(cs[1]) + "\n", die=True) for i in xrange(len(circleFiles)): # get data, samples, and features from each circleFile # data is a dict[col][row]=score # cols is a list of sample names # features is a list of feature names (data, cols, rows) = mData.rCRSData(circleFiles[i], retFeatures=True) circleData.append(data) minCol = lightBlueRGB zerCol = whiteRGB maxCol = redRGB if colorscaleFile != None and i < len(colorscaleData): #get colors from specified colorscaleFile minCol = colorscaleData[i][2] zerCol = colorscaleData[i][3] maxCol = colorscaleData[i][4] # special cases for -meth and -mut # if circleFiles[i].endswith("meth"): # maxCol = blueRGB # minCol = redRGB # log("Color: meth\n") # elif circleFiles[i].endswith("mut"): # maxCol = blackRGB # minCol = whiteRGB # log("Color: mut\n") circleColorsPalette.append((minCol, zerCol, maxCol)) # if no sampleFile/featureFile, default to using samples/features from circleFiles if sampleFile == None: samples = list(set(cols) | set(samples)) if featureFile == None: features = list(set(rows) | set(features)) # end section for reading circleFiles ## read centerFile centerData = None if centerFile != None: centerData = mData.r2Col(centerFile, header=True) ## sort if orderFeature != None: if len(orderFiles) > 0: orderData = [] orderColors = [] for i in xrange(len(orderFiles)): orderData.append(mData.rCRSData(orderFiles[i])) minCol = whiteRGB zerCol = whiteRGB maxCol = blackRGB orderColors.append((minCol, zerCol, maxCol)) else: orderData = circleData # sort samples based on sample score in orderData # priority of sorting determined by orderFiles parameter samples.sort(lambda x, y: scmp(x, y, orderFeature, orderData)) ## cohort png # cgi will probably not use orderFiles if len(orderFiles) > 0: imgFile = "%s/Cohort.png" % (outputDir) label = "Cohort" centerCol = whiteRGB.tohex() cohortCircleCols = [] for i in xrange(len(orderData)): ringCols = [] ringVals = [] for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringVals.append(orderData[i][sample][orderFeature]) elif "*" in orderData[i][sample]: ringVals.append(orderData[i][sample]["*"]) minVal = min([-0.01] + mData.floatList(ringVals)) maxVal = max([0.01] + mData.floatList(ringVals)) for sample in samples: if sample in orderData[i]: if orderFeature in orderData[i][sample]: ringCols.append( getColor(orderData[i][sample][orderFeature], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) elif "*" in orderData[i][sample]: ringCols.append( getColor(orderData[i][sample]["*"], minVal, maxVal, minColor=orderColors[i][0], zeroColor=orderColors[i][1], maxColor=orderColors[i][2])) else: ringCols.append(greyRGB.tohex()) else: ringCols.append(greyRGB.tohex()) cohortCircleCols.append(ringCols) plotCircle(imgFile, label=label, centerCol=centerCol, circleCols=cohortCircleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=5) # end section for sample ordering ## plot images if centerData != None: centerDataFloatList = mData.floatList(centerData.values()) centerDataMinVal = min([-0.01] + centerDataFloatList) centerDataMaxVal = max([0.01] + centerDataFloatList) # get min/max values for datasets if cohortMinMax: (minValList, maxValList) = getCohortMinMaxValues(features, samples, circleData) else: (minValList, maxValList) = (None, None) if colorscaleData != None: (minValList, maxValList) = getColorScaleMinMaxValues(minValList, maxValList, len(circleData), colorscaleData) for feature in features: log("Drawing %s\n" % (feature)) centerColHex = None if centerData != None: if feature in centerData: centerColHex = getColor(centerData[feature], centerDataMinVal, centerDataMaxVal, minColor=lightBlueRGB, zeroColor=whiteRGB, purple0Hack=purpleHack) imgFile = "%s/%s.png" % (outputDir, re.sub("[/:]", "_", feature)) label = "" if printLabel: label = feature image_width = 5.0 drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColorsPalette, width=image_width, centerColHex=centerColHex, minValList=minValList, maxValList=maxValList, purple0Hack=purpleHack) for sample in samples: log("ordered samples: %s\n" % (sample))
def drawCircleImageForFeature(feature, samples, label, imgFile, circleData, circleColors, centerColHex=None, width=5, minValList=None, maxValList=None, purple0Hack=False): """Draw a circle map image and write it to a file.""" # feature - feature to draw image for. This is some kind of concept: for example, a gene. # samples - sample names of data # label - label to use in image # imgFile - file object to which image will be written # circleData - data struct containing sample data for features. It is a list of dict[col][row]=score . # circleColors - a list of (minColor),(zeroColor),(maxColor) # centerColHex - hex code for center color fill. If none, then make transparent center. # centerCol is the color of the center of the circleImage # centerCol = whiteRGB.tohex() # circleCols is a list. Each member of the list represents a list of colors in a ring. circleCols = [] # iterate through rings of data for ring in xrange(len(circleData)): ringCols = [] # get minVal and maxVal minVal = None maxVal = None if minValList == None or maxValList == None or minValList[ ring] == None or maxValList[ring] == None: ringVals = [] # get ring values in effort to find min/max values for each *ring* for sample in samples: if sample in circleData[ring]: if feature in circleData[ring][sample]: ringVals.append(circleData[ring][sample][feature]) elif "*" in circleData[ring][sample]: ringVals.append(circleData[ring][sample]["*"]) # find the min & max sample scores for this ring in this feature floatList = mData.floatList(ringVals) minVal = min([-0.01] + floatList) maxVal = max([0.01] + floatList) else: minVal = minValList[ring] maxVal = maxValList[ring] # convert scores into colors for sample in samples: if sample in circleData[ring]: if feature in circleData[ring][sample]: ringCols.append( getColor(circleData[ring][sample][feature], minVal, maxVal, minColor=circleColors[ring][0], zeroColor=circleColors[ring][1], maxColor=circleColors[ring][2], purple0Hack=purple0Hack)) elif "*" in circleData[ring][sample]: ringCols.append( getColor(circleData[ring][sample]["*"], minVal, maxVal, minColor=circleColors[ring][0], zeroColor=circleColors[ring][1], maxColor=circleColors[ring][2], purple0Hack=purple0Hack)) else: # sample exists, but no score for the feature ringCols.append(greyRGB.tohex()) else: # this sample not found in the sample data ringCols.append(greyRGB.tohex()) # add the ring circleCols.append(ringCols) # plot the image plotCircle(imgFile, label=label, centerColHex=centerColHex, circleCols=circleCols, innerRadTotal=0.2, outerRadTotal=0.5, width=width)