def writeMatrix(self, matrixParams, outFilename): print datetime.now(), 'writing out data matrix to %s' % outFilename newFeatureName = "C:SAMP:" + self.configuration['fm_feature_type'].lower() + "Platform:::::" + self.configuration['fm_platform_type'] newFeatureValue = self.configuration['techtype'] tsvIO.addConstFeature (matrixParams, newFeatureName, newFeatureValue) tsvIO.writeTSV_dataMatrix (matrixParams, matrixParams['sortRowFlag'], matrixParams['sortColFlag'], outFilename) print datetime.now(), 'finished writing out data matrix\n'
def writeMatrix(self, matrixParams, outFilename): print datetime.now(), 'writing out data matrix to %s' % outFilename newFeatureName = "C:SAMP:" + self.configuration[ 'fm_feature_type'].lower( ) + "Platform:::::" + self.configuration['fm_platform_type'] newFeatureValue = self.configuration['techtype'] tsvIO.addConstFeature(matrixParams, newFeatureName, newFeatureValue) tsvIO.writeTSV_dataMatrix(matrixParams, matrixParams['sortRowFlag'], matrixParams['sortColFlag'], outFilename) print datetime.now(), 'finished writing out data matrix\n'
def main(): args = parseArgs() chr2data = {} chr2maxcoord = {} for index in range(1, 25): chrom = new_Level3_matrix_MM28may13.unifychr(str(index)) chr2data[chrom] = new_Level3_matrix_MM28may13.AutoVivification() chr2maxcoord[chrom] = 0 steplength = 1000 sampleList = _readAllSnpDataFile( args.infile, args.include, chr2data, chr2maxcoord, steplength) cutFrac = 0.02 resegment.NA_VALUE = -999999 resegment.NEAR_ZERO = 0.0001 segList, _, dataMatrix = _resegmentCNdata( sampleList, chr2data, chr2maxcoord, steplength, cutFrac) try: dataD = {} dataD['rowLabels'] = segList dataD['colLabels'] = sampleList dataD['dataMatrix'] = dataMatrix dataD['dataType'] = "%s:%s" % ("N", "CNVR") newFeatureName = "C:SAMP:" + "cnvrPlatform" newFeatureValue = "Genome_Wide_SNP_6" dataD = tsvIO.addConstFeature(dataD, newFeatureName, newFeatureValue) sortRowFlag = 0 sortColFlag = 1 tsvIO.writeTSV_dataMatrix( dataD, sortRowFlag, sortColFlag, args.outfile) except: print " FATAL ERROR: failed to write out any resegmented copy-number data "
# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# if __name__ == "__main__": if (1): if (len(sys.argv) == 3): inFile = sys.argv[1] outFile = sys.argv[2] else: print " " print " Usage: %s <input TSV file> <output TSV file> " print " " print " ERROR -- bad command line arguments " sys.exit(-1) print " " print " Running : %s %s %s " % (sys.argv[0], sys.argv[1], sys.argv[2]) print " " print " " # now read in the input feature matrix ... dataD = tsvIO.readTSV(inFile) # add new custom features ... dataD = addCustomFeatures(dataD) # and write the matrix back out tsvIO.writeTSV_dataMatrix(dataD, 0, 0, outFile) # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
numCol = len(dataMatrix[0]) skipRowList = [] for iRow in range(numRow): allNA = 1 iCol = 0 while (allNA == 1 and iCol < numCol): if (dataMatrix[iRow][iCol] != NA_VALUE): allNA = 0 iCol += 1 if (allNA): skipRowList += [iRow] if (len(skipRowList) > 0): print " after checking for all-NA features ... " print " number of rows to be skipped : ", len(skipRowList) print " --> number of rows remaining : ", (numRow - len(skipRowList)) outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, []) outD = outD2 # print " " # print " calling writeTSV_dataMatrix ... ", outFile sortRowFlag = 0 sortColFlag = 0 tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile) print " " print " DONE " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
outD['colLabels'] = unionColLabels outD['dataMatrix'] = outMatrix sortRowFlag = 0 # seems best not to sort the rows sortColFlag = 0 if (sortRowFlag): fTypeList.sort() outD['dataType'] = makeDataTypeString(dTypeList, fTypeList) if (pruneOrder != "NA"): print " " print " now calling pruneTSV_dataMatrix on the merged dataMatrix ... ", pruneOrder outD = tsvIO.pruneTSV_dataMatrix(outD, rowMaxNAfrac, colMaxNAfrac, pruneOrder) else: print " " print " NOT doing any pruning of the merged dataMatrix " print " " print ' (d) TIME ', time.asctime(time.localtime(time.time())) print " calling writeTSV_dataMatrix ... ", outFile tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile) print " " print " DONE ", dTypeList, fTypeList print ' (e) TIME ', time.asctime(time.localtime(time.time())) print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
if __name__ == "__main__": if (1): if (len(sys.argv) != 4): print " Usage : %s <input feature matrix> <output feature matrix> <featType> " % sys.argv[0] print " to avoid filtering on featType, use ANY " sys.exit(-1) inFile = sys.argv[1] outFile = sys.argv[2] featType = sys.argv[3] print " " print " " print " ******************** " print " in filterIdentFeat " print " ******************** " inD = tsvIO.readTSV(inFile) rowLabels = inD['rowLabels'] numRow = len(rowLabels) outD = removeIdenticalFeatures(inD, featType) tsvIO.writeTSV_dataMatrix(outD, 0, 0, outFile) print " FINISHED " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
else: print " " print " Usage: %s <input TSV file> <output TSV file> " print " " print " ERROR -- bad command line arguments " sys.exit(-1) print " " print " Running : %s %s %s " % (sys.argv[0], sys.argv[1], sys.argv[2]) print " " # read in the input feature matrix first, just in case there # actually isn't one yet available ... testD = tsvIO.readTSV(inFile) try: print len(testD['rowLabels']), len(testD['colLabels']) except: print " --> invalid / missing input feature matrix " sys.exit(-1) # we want to "check" for "deleted" METH probes if (do_summaryMeth): newD = summaryMeth(testD) ## tsvIO.writeTSV_dataMatrix ( newD, 0, 0, outFile ) testD = newD # and finally write it out ... tsvIO.writeTSV_dataMatrix(testD, 0, 0, outFile) # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
fVal = float(tokenList[iC + 1]) dataMatrix[iR][iC] = fVal except: dataMatrix[iR][iC] = NA_VALUE numNA += 1 iR += 1 print " iR=%d numNA=%d " % (iR, numNA) dataD = {} dataD['rowLabels'] = rowLabels dataD['colLabels'] = hdrTokens[1:] dataD['dataMatrix'] = dataMatrix dataD['dataType'] = "N:MIRN" print ' writing out data matrix to ', outFilename newFeatureName = "C:SAMP:mirnPlatform:::::seq" newFeatureValue = zPlat dataD = tsvIO.addConstFeature(dataD, newFeatureName, newFeatureValue) sortRowFlag = 0 sortColFlag = 0 tsvIO.writeTSV_dataMatrix( dataD, sortRowFlag, sortColFlag, outFilename) print ' ' print ' DONE !!! ' print ' ' # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
iCol = 0 while (allNA == 1 and iCol < numCol): if (dataMatrix[iRow][iCol] != NA_VALUE): allNA = 0 iCol += 1 if (allNA): skipRowList += [iRow] if (len(skipRowList) > 0): print " after checking for all-NA features ... " print " number of rows to be skipped : ", len(skipRowList) print " --> number of rows remaining : ", (numRow - len(skipRowList)) outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, []) outD = outD2 # set up sorting options ... sortRowFlag = 0 sortColFlag = 0 rowOrder = [] colOrder = [] # print " " # print " calling writeTSV_dataMatrix ... ", outFile tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile, rowOrder, colOrder) print " " print " DONE " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
if __name__ == "__main__": if (len(sys.argv) != 3): print " Usage : %s <input TSV> <output TSV> " % sys.argv[0] sys.exit(-1) tsvNameIn = sys.argv[1] tsvNameOut = sys.argv[2] print " " print " ****************************************************************** " print " reading input file <%s> " % tsvNameIn tmpD = tsvIO.readTSV(tsvNameIn) if (len(tmpD) == 0): print " in addIndicators ... no input data ... nothing to do here ... " sys.exit(-1) # automatically generate indicator features for categorical features tmpD = addIndicatorFeatures(tmpD) tsvIO.writeTSV_dataMatrix(tmpD, 0, 0, tsvNameOut) print " " print " " print " FINISHED " print " " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
outFile = sys.argv[2] else: print " " print " Usage: %s <input TSV file> <output TSV file> " print " " print " ERROR -- bad command line arguments " sys.exit(-1) print " " print " Running : %s %s %s " % (sys.argv[0], sys.argv[1], sys.argv[2]) print " " # read in the input feature matrix first, just in case there # actually isn't one yet available ... testD = tsvIO.readTSV(inFile) try: print len(testD['rowLabels']), len(testD['colLabels']) except: print " --> invalid / missing input feature matrix " sys.exit(-1) # and write it back out ... sortColFlag = 1 # sort the sample barcodes sortRowFlag = 0 # do NOT sort the feature names simpleNamesFlag = 1 # and write out 'simple' names tsvIO.writeTSV_dataMatrix ( testD, sortRowFlag, sortColFlag, \ outFile, [], [], simpleNamesFlag ) # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
if (len(sys.argv) != 3): print " Usage : %s <input TSV> <output TSV> " % sys.argv[0] print " ERROR -- bad command line arguments " sys.exit(-1) tsvNameIn = sys.argv[1] tsvNameOut = sys.argv[2] print " " print " ****************************************************************** " print " reading input file <%s> " % tsvNameIn tmpD = tsvIO.readTSV(tsvNameIn) if (len(tmpD) == 0): print " in addIndicators ... no input data ... nothing to do here ... " sys.exit(-1) # automatically generate indicator features for categorical features tmpD = addIndicatorFeatures(tmpD) tsvIO.writeTSV_dataMatrix(tmpD, 0, 0, tsvNameOut) print " " print " " print " FINISHED " print " " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
while (allNA == 1 and iCol < numCol): if (dataMatrix[iRow][iCol] != NA_VALUE): allNA = 0 iCol += 1 if (allNA): skipRowList += [iRow] if (len(skipRowList) > 0): print " after checking for all-NA features ... " print " number of rows to be skipped : ", len(skipRowList) print " --> number of rows remaining : ", (numRow - len(skipRowList)) outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, []) outD = outD2 # set up sorting options ... sortRowFlag = 0 sortColFlag = 0 rowOrder = [] colOrder = [] # print " " # print " calling writeTSV_dataMatrix ... ", outFile tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile, rowOrder, colOrder) print " " print " DONE " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
# check that the feature names are still unique ... print " --> verify that the feature names are unique ... " (newLabels, rmList) = tsvIO.uniqueFeatureLabels(annotD['rowLabels'], annotD['dataMatrix']) print " back from tsvIO.uniqueFeatureLabels " # quick sanity check that labels are still what I think they are ... for ii in range(len(newLabels)): if (not (newLabels[ii] == annotD['rowLabels'][ii])): print " " print " BAILING !!! ", newLabels[ii], annotD['rowLabels'][ii] print " " sys.exit(-1) # remove any 'extra' features that need removing ... if (len(rmList) > 0): print " --> need to remove these rows ", rmList tmpD = tsvIO.filter_dataMatrix(annotD, rmList, []) annotD = tmpD # and write the matrix back out print " --> calling tsvIO.writeTSV_dataMatrix ... " tsvIO.writeTSV_dataMatrix(annotD, 0, 0, outFile) print " " print " DONE " print " " # -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#