def main(options, programName, programVersion): ## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION # basic default info logFile = "" tagFile = "" analysisName = "" analysisFolder = "" defaultAnalysisName = programName.lower() verbose = False separator = "_" # default extensions # begin: jmrc # defaultTableExtension = ".xls" defaultTextExtension = ".txt" # defaultTextExtension = ".tsv" # end: jmrc # default file names defaultLogFile = "logFile" defaultNewRelsFile = "newRels" defaultNewDataFile = "newData" newRelsFile = "" newDataFile = "" # basic log file logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] # parsing arguments from commandline options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.") options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.") options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.") options.add_argument("-t", "--tagfile", type = str, default = "", required = True, help = "The file containing the tags used for the different experiments to be joined.") options.add_argument("-d", "--datafile", type = str, default = "", required = False, help = "To use a non-default merged data file name.") options.add_argument("-r", "--relfile", type = str, default = "", required = False, help = "To use a non-default merged relations file name.") options.add_argument("-s", "--separator", type = str, default = "_", required = False, help = """To use a non-default suffix separator (default is "_").""") options.add_argument("-v", "--verbose", action = "store_true", help = "To write down extra information about operations performed.") # *-*-* add easter egg arguments = options.parse_args() # copying parsed arguments # copy any arguments used if len(arguments.analysis) > 0: analysisName = arguments.analysis if len(arguments.place) > 0: analysisFolder = arguments.place if len(arguments.logfile) > 0: logFile = arguments.logfile if len(arguments.tagfile) > 0: tagFile = arguments.tagfile if len(arguments.datafile) > 0: newDataFile = arguments.datafile if len(arguments.relfile) > 0: newRelsFile = arguments.relfile if len(arguments.separator) > 0: separator = arguments.separator verbose = arguments.verbose ## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION ## ********************************************************** ## REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(tagFile) > 0: analysisName = os.path.splitext(os.path.basename(tagFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # input if len(os.path.dirname(tagFile)) == 0: tagFile = os.path.join(analysisFolder, tagFile) # output if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) if len(newRelsFile) == 0: newRelsFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelsFile + defaultTextExtension) if len(os.path.dirname(newRelsFile)) == 0 and len(os.path.basename(newRelsFile)) > 0: newRelsFile = os.path.join(analysisFolder, newRelsFile) if len(newDataFile) == 0: newDataFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewDataFile + defaultTextExtension) if len(os.path.dirname(newDataFile)) == 0 and len(os.path.basename(newDataFile)) > 0: newDataFile = os.path.join(analysisFolder, newDataFile) logList.append([""]) logList.append(["Input tags file: " + tagFile]) logList.append(["Output new data file: " + newDataFile]) logList.append(["Output new relations file: " + newRelsFile]) logList.append(["Output log file: " + logFile]) logList.append([""]) ## END REGION: FILE NAMES SETUP ## ********************************************************** ## REGION: PROGRAM BASIC STRUCTURE tagList, dataFileList = readTagFile(tagFile, defaultFolder = analysisFolder) processMessage = processFiles(tagList, dataFileList, verbose = verbose, separator = separator, dataFile = newDataFile, relsFile = newRelsFile) logList.extend(processMessage) ## END REGION: PROGRAM BASIC STRUCTURE ## ********************************************************** ## REGION: SAVING FILES ## exceptionally, due to memory errors, the data are read and written in processFiles # try: # # operations here # logList.append(["Saving new data file..."]) # stats.saveFile(newDataFile, newDataList, "idinf\tX'inf\tVinf") # except Exception: # logList.append(["Error."]) # stats.saveFile(logFile, logList, "LOG FILE") # sys.exit(2) # try: # # operations here # logList.append(["Saving new relations file..."]) # stats.saveFile(newRelsFile, newRelsList, "idsup\tidinf") # except Exception: # stats.saveFile(logFile, logList, "LOG FILE") # logList.append(["Error."]) # sys.exit(2) try: # operations here logList.append(["Looks like everything went fine."]) except Exception: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")
def showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, verbose=False, showRank=False, graphFile=None, graphData=None, dpi=None, showGraph=True): plt.clf() inputRawData.sort() inputRelations.sort() windowWidth = medianSide * 2 + 1 if len(inputRawData) < windowWidth: print('Error: window for median is bigger than total input size') sys.exit(1) # output = makeStats(k, variance, input = input) nextIdXData = getNextIdX_klibrate(inputRawData, inputRelations, k, variance, alpha, giveMergedData=True) nextIdX = nextIdXData[0] mergedData = nextIdXData[1] MADdistrOut, weights = getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide) invOfFitOut = getInverseOfFit(mergedData, k, variance, alpha) MADdistrOut = MADdistrOut[medianSide:len(MADdistrOut) - medianSide + 1] invOfFitOut = invOfFitOut[medianSide:len(invOfFitOut) - medianSide + 1] weights = weights[medianSide:len(weights) - medianSide + 1] # folderToSave = "D:\\DATUMARO\\trabajo\\programas_repositorio\\BioSistemas SanXoT\\MicroArrays\\intento5 CvsCs usando var robusta por partes\\" # stats.saveFile(folderToSave + "MADdistrOut.txt", MADdistrOut) # stats.saveFile(folderToSave + "invOfFitOut.txt", invOfFitOut) # stats.saveFile(folderToSave + "weights.txt", weights) if showRank: plt.plot(list(range(len(MADdistrOut))), MADdistrOut, 'g.', list(range(len(invOfFitOut))), invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0) plt.xlabel('rank($V_s$)') plt.ylabel('1 / MSD') # to save data # *** use a better filename dataToSave = [] for i in range(len(MADdistrOut)): dataToSave.append([i, weights[i], MADdistrOut[i], invOfFitOut[i]]) if graphData: stats.saveFile(graphData, dataToSave, "rank(Vs)\tweight\tMAD\t1/fit") else: # uncomment to graph MSD instead of 1 / MSD # # for i in xrange(len(invOfFitOut)): # invOfFitOut[i] = 1 / invOfFitOut[i] # for i in xrange(len(MADdistrOut)): # MADdistrOut[i] = 1 / MADdistrOut[i] plt.plot(weights, MADdistrOut, 'g.', weights, invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0) plt.xlabel('($V_s$)') plt.ylabel('1 / MSD') plt.grid(True) plt.title('k = %g, $\sigma^2$ = %g, alpha = %g' % (k, variance, alpha)) if graphFile: plt.savefig(graphFile, dpi=dpi) if showGraph: plt.show()
def main(argv): version = "v1.19" verbose = True showGraph = True graphDPI = 100 # default of Matplotlib's savefig method showSteps = True forceParameters = False kSeed = 1.0 varianceSeed = 0.001 alphaSeed = 1.0 useCooperativity = False medianSide = 100 maxIterations = 0 dataFile = "" relationsFile = "" outputCalibrated = "" infoFile = "" kFile = "" kSeedProvided = False varFile = "" varianceSeedProvided = False graphFileVRank = "" graphFileVValue = "" graphDataFile = "" showRank = False analysisName = "" defaultAnalysisName = "klibrate" analysisFolder = "" logList = [["Klibrate " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] defaultDataFile = "data" defaultRelationsFile = "rels" defaultOutputInfo = "infoFile" defaultOutputGraphVRank = "outGraph_VRank" defaultOutputGraphVValue = "outGraph_VValue" defaultGraphDataFile = "outGraph_Data" defaultOutputCalibrated = "calibrated" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" try: opts, args = getopt.getopt( argv, "a:p:k:v:c:d:r:o:w:m:L:G:D:R:K:V:bgsfh", [ "analysis=", "folder=", "kseed=", "varianceseed=", "alphaseed=", "datafile=", "relfile=", "outputfile=", "window=", "maxiterations=", "infofile=", "outgraphvrank=", "outgraphvvalue=", "outgraphdata=", "kfile=", "varfile=", "no-verbose", "no-showgraph", "no-showsteps", "forceparameters", "showrank", "help", "egg", "easteregg" ]) except getopt.GetoptError: message = "Error while getting parameters." print(message) logList.append([message]) # stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-k", "--kseed"): kSeed = float(arg) kSeedProvided = True elif opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-c", "--alphaseed"): useCooperativity = True alphaSeed = float(arg) elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-o", "--outputfile"): outputCalibrated = arg elif opt in ("-w", "--window"): windowWidth = round(float(arg)) if windowWidth % 2 == 0: windowWidth += 1 medianSide = int((windowWidth - 1) / 2) elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("-g", "--no-showgraph"): showGraph = False elif opt in ("-s", "--no-showsteps"): showSteps = False elif opt in ("-m", "--maxiterations"): maxIterations = int(arg) elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-G", "--outgraphvvalue"): graphFileVValue = arg elif opt in ("-D", "--outgraphdata"): graphDataFile = arg elif opt in ("-K", "--kfile"): kFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-f", "--forceparameters"): forceParameters = True elif opt in ("-R", "--outgraphvrank"): graphFileVRank = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() verbose = verbose or showSteps # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join( analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(os.path.dirname(varFile)) == 0 and len( os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile( varFile, verbose=verbose, defaultSeed=varianceSeed) if not varianceOk: logList.append(["Variance not found in text file"]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(os.path.dirname(kFile)) == 0 and len(os.path.basename(kFile)) > 0: kFile = os.path.join(analysisFolder, kFile) if len(kFile) > 0 and not kSeedProvided: kSeed, KOk = stats.extractKFromKFile(kFile, verbose=verbose, defaultSeed=kSeed) if not KOk: logList.append(["K not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() # output if len(outputCalibrated) == 0: outputCalibrated = os.path.join( analysisFolder, analysisName + "_" + defaultOutputCalibrated + defaultTableExtension) else: if len(os.path.dirname(outputCalibrated)) == 0: outputCalibrated = os.path.join(analysisFolder, outputCalibrated) if len(graphFileVRank) == 0: graphFileVRank = os.path.join( analysisFolder, analysisName + "_" + defaultOutputGraphVRank + defaultGraphExtension) if len(graphFileVValue) == 0: graphFileVValue = os.path.join( analysisFolder, analysisName + "_" + defaultOutputGraphVValue + defaultGraphExtension) if len(graphDataFile) == 0: graphDataFile = os.path.join( analysisFolder, analysisName + "_" + defaultGraphDataFile + defaultTableExtension) if len(infoFile) == 0: infoFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["K seed = " + str(kSeed)]) if useCooperativity: logList.append(["Alpha seed = " + str(alphaSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) logList.append(["Output calibrated file: " + outputCalibrated]) logList.append(["Output info file: " + infoFile]) logList.append(["Output V rank graph file: " + graphFileVRank]) logList.append(["Output V graph file: " + graphFileVValue]) logList.append(["Output data file for graph: " + graphDataFile]) logList.append(["Parameters forced: " + str(forceParameters)]) logList.append(["Max iterations: " + str(maxIterations)]) # END REGION: FILE NAMES SETUP calibratedData, variance, k, alpha, extraLog = calibrate( rawDataFile=dataFile, relationsFile=relationsFile, kSeed=kSeed, varianceSeed=varianceSeed, medianSide=medianSide, maxIterations=maxIterations, verbose=showSteps, showGraph=showGraph, forceParameters=forceParameters, alphaSeed=alphaSeed, showRank=showRank, useCooperativity=useCooperativity, graphFileVRank=graphFileVRank, graphFileVValue=graphFileVValue, graphDataFile=graphDataFile, graphDPI=graphDPI) logList.extend(extraLog) if not calibratedData: if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(outputCalibrated) > 0: stats.saveFile(outputCalibrated, calibratedData, "id\tX\tVcal") logList.append([]) logList.append(["K = " + str(k)]) logList.append(["Variance = " + str(variance)]) if useCooperativity: logList.append(["Alpha = " + str(alpha)]) if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE") if verbose: print() print("*** results ***") print("k = " + str(k)) print("variance = " + str(variance)) if useCooperativity: print("alpha = " + str(alpha)) print() print("Output calibrated file in: " + outputCalibrated) print("Graph with rank of V in: " + graphFileVRank) print("Graph with value of V in: " + graphFileVValue) print("Info file in: " + infoFile)
def main(argv): version = "v1.18" verbose = True showGraph = True graphDPI = 100 # default of Matplotlib's savefig method showSteps = True forceParameters = False kSeed = 1.0 varianceSeed = 0.001 alphaSeed = 1.0 useCooperativity = False medianSide = 100 maxIterations = 0 dataFile = "" relationsFile = "" outputCalibrated = "" infoFile = "" kFile = "" kSeedProvided = False varFile = "" varianceSeedProvided = False graphFileVRank = "" graphFileVValue = "" graphDataFile = "" showRank = False analysisName = "" defaultAnalysisName = "klibrate" analysisFolder = "" logList = [["Klibrate " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] defaultDataFile = "data" defaultRelationsFile = "rels" defaultOutputInfo = "infoFile" defaultOutputGraphVRank = "outGraph_VRank" defaultOutputGraphVValue = "outGraph_VValue" defaultGraphDataFile = "outGraph_Data" defaultOutputCalibrated = "calibrated" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" try: opts, args = getopt.getopt(argv, "a:p:k:v:c:d:r:o:w:m:L:G:D:R:K:V:bgsfh", ["analysis=", "folder=", "kseed=", "varianceseed=", "alphaseed=", "datafile=", "relfile=", "outputfile=", "window=", "maxiterations=", "infofile=", "outgraphvrank=", "outgraphvvalue=", "outgraphdata=", "kfile=", "varfile=", "no-verbose", "no-showgraph", "no-showsteps", "forceparameters", "showrank", "help", "egg", "easteregg"]) except getopt.GetoptError: message = "Error while getting parameters." print(message) logList.append([message]) # stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-k", "--kseed"): kSeed = float(arg) kSeedProvided = True elif opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-c", "--alphaseed"): useCooperativity = True alphaSeed = float(arg) elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-o", "--outputfile"): outputCalibrated = arg elif opt in ("-w", "--window"): windowWidth = round(float(arg)) if windowWidth % 2 == 0: windowWidth += 1 medianSide = int((windowWidth - 1) / 2) elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("-g", "--no-showgraph"): showGraph = False elif opt in ("-s", "--no-showsteps"): showSteps = False elif opt in ("-m", "--maxiterations"): maxIterations = int(arg) elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-G", "--outgraphvvalue"): graphFileVValue = arg elif opt in ("-D", "--outgraphdata"): graphDataFile = arg elif opt in ("-K", "--kfile"): kFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-f", "--forceparameters"): forceParameters = True elif opt in ("-R", "--outgraphvrank"): graphFileVRank = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() verbose = verbose or showSteps # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(relationsFile) == 0: relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed) if not varianceOk: logList.append(["Variance not found in text file"]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(os.path.dirname(kFile)) == 0 and len(os.path.basename(kFile)) > 0: kFile = os.path.join(analysisFolder, kFile) if len(kFile) > 0 and not kSeedProvided: kSeed, KOk = stats.extractKFromKFile(kFile, verbose = verbose, defaultSeed = kSeed) if not KOk: logList.append(["K not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() # output if len(outputCalibrated) == 0: outputCalibrated = os.path.join(analysisFolder, analysisName + "_" + defaultOutputCalibrated + defaultTableExtension) else: if len(os.path.dirname(outputCalibrated)) == 0: outputCalibrated = os.path.join(analysisFolder, outputCalibrated) if len(graphFileVRank) == 0: graphFileVRank = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVRank + defaultGraphExtension) if len(graphFileVValue) == 0: graphFileVValue = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVValue + defaultGraphExtension) if len(graphDataFile) == 0: graphDataFile = os.path.join(analysisFolder, analysisName + "_" + defaultGraphDataFile + defaultTableExtension) if len(infoFile) == 0: infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["K seed = " + str(kSeed)]) if useCooperativity: logList.append(["Alpha seed = " + str(alphaSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) logList.append(["Output calibrated file: " + outputCalibrated]) logList.append(["Output info file: " + infoFile]) logList.append(["Output V rank graph file: " + graphFileVRank]) logList.append(["Output V graph file: " + graphFileVValue]) logList.append(["Output data file for graph: " + graphDataFile]) logList.append(["Parameters forced: " + str(forceParameters)]) logList.append(["Max iterations: " + str(maxIterations)]) # END REGION: FILE NAMES SETUP calibratedData, variance, k, alpha, extraLog = calibrate(rawDataFile = dataFile, relationsFile = relationsFile, kSeed = kSeed, varianceSeed = varianceSeed, medianSide = medianSide, maxIterations = maxIterations, verbose = showSteps, showGraph = showGraph, forceParameters = forceParameters, alphaSeed = alphaSeed, showRank = showRank, useCooperativity = useCooperativity, graphFileVRank = graphFileVRank, graphFileVValue = graphFileVValue, graphDataFile = graphDataFile, graphDPI = graphDPI) logList.extend(extraLog) if not calibratedData: if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(outputCalibrated) > 0: stats.saveFile(outputCalibrated, calibratedData, "id\tX\tVcal") logList.append([]) logList.append(["K = " + str(k)]) logList.append(["Variance = " + str(variance)]) if useCooperativity: logList.append(["Alpha = " + str(alpha)]) if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE") if verbose: print() print("*** results ***") print("k = " + str(k)) print("variance = " + str(variance)) if useCooperativity: print("alpha = " + str(alpha)) print() print("Output calibrated file in: " + outputCalibrated) print("Graph with rank of V in: " + graphFileVRank) print("Graph with value of V in: " + graphFileVValue) print("Info file in: " + infoFile)
def main(argv): version = "v1.06" verbose = False showGraph = True showLegend = True analysisName = "" graphLimits = 6.0 defaultAnalysisName = "arbor" analysisFolder = "" # input files inStats = "" useSubStats = False # True if inStats (-z) and relationsFile (-r) are provided bigListFile = "" defaultStatsFile = "stats" defaultRelationsFile = "rels" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGVFileExtension = ".gv" relationsFile = "" listOfCategoriesFile = "" # output files defaultListOfCategoriesFile = "ulst" defaultBigListFile = "table_allPaths.tsv" defaultOutputFile = "outNodes" defaultOutputGraphFile = "outTree" defaultLogFile = "logFile" logFile = "" outputFile = "" graphFile = "" similarityMatrixFile = "" graphFileFormat = "png" altMax = 5 selectedNodeColour = "#ff9090" defaultNodeColour = "#ffff80" errorNodeColour = "#8080ff" minColour = "#00ff00" middleColour = "#ffffff" maxColour = "#ff0000" logList = [["SanXoTGauss " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:z:c:d:l:L:b:G:r:g:N:h", [ "analysis=", "folder=", "place=", "statsfile=", "list=", "dotfile=", "graphlimits=", "logfile=", "biglist=", "graphfile=", "relfile=", "relationsfile=", "graphformat=", "altmax=", "selectednodecolour=", "selectednodecolor=", "defaultnodecolour=", "defaultnodecolor=", "errornodecolour=", "errornodecolor=", "mincolour=", "mincolor=", "middlecolour=", "middlecolor=", "maxcolour=", "maxcolor=", "help", "egg", "easteregg" ]) except getopt.GetoptError: message = "Error while getting parameters." print message logList.append([message]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg elif opt in ("-p", "--place", "--folder"): analysisFolder = arg elif opt in ("-z", "--statsfile"): inStats = arg elif opt in ("-c", "--list"): # outList from SanXoTSqueezer listOfCategoriesFile = arg elif opt in ("-d", "--dotfile"): dotFile = float(arg) elif opt in ("-l", "--graphlimits"): graphLimits = float(arg) elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-d", "--dotfile"): outputFile = arg elif opt in ("-G", "--graphfile"): graphFile = arg elif opt in ("-b", "--biglist"): # table_allPaths.xls from GOconnect bigListFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-N", "--altmax"): altMax = int(arg) elif opt in ("-g", "--graphformat"): graphFileFormat = arg.lower().strip() if graphFileFormat == "jpeg": graphFileFormat = "jpg" if graphFileFormat != "png" and \ graphFileFormat != "svg" and \ graphFileFormat != "jpg" and \ graphFileFormat != "gif": print print "Warning: graph format \"%s\" is not supported,\npng will be used instead." % graphFileFormat print graphFileFormat = "png" elif opt in ("--selectednodecolour", "--selectednodecolor"): selectedNodeColour = arg elif opt in ("--defaultnodecolour", "--defaultnodecolor"): defaultNodeColour = arg elif opt in ("--errornodecolour", "--errornodecolor"): errorNodeColour = arg elif opt in ("--mincolour", "--mincolor"): minColour = arg elif opt in ("--middlecolour", "--middlecolor"): middleColour = arg elif opt in ("--maxcolour", "--maxcolor"): maxColour = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() if len(inStats) > 0 and len(relationsFile) > 0: useSubStats = True # REGION: FILE NAMES SETUP defaultGraphExtension = "." + graphFileFormat if len(analysisName) == 0: if len(inStats) > 0: analysisName = os.path.splitext(os.path.basename(inStats))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(inStats) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(inStats)) > 0: analysisFolder = os.path.dirname(inStats) # input if len(inStats) == 0: inStats = os.path.join( analysisFolder, analysisName + "_" + defaultStatsFile + defaultTableExtension) if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0: inStats = os.path.join(analysisFolder, inStats) if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(listOfCategoriesFile) == 0: listOfCategoriesFile = os.path.join( analysisFolder, analysisName + "_" + defaultListOfCategoriesFile + defaultTextExtension) if len(os.path.dirname(listOfCategoriesFile)) == 0 and len( os.path.basename(listOfCategoriesFile)) > 0: listOfCategoriesFile = os.path.join(analysisFolder, listOfCategoriesFile) if len(bigListFile) == 0: bigListFile = defaultBigListFile if len(os.path.dirname(bigListFile)) == 0 and len( os.path.basename(bigListFile)) > 0: bigListFile = os.path.join(analysisFolder, bigListFile) # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputFile + defaultGVFileExtension) if len(graphFile) == 0: graphFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputGraphFile + defaultGraphExtension) if len(logFile) == 0: logFile = os.path.join( analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(outputFile)) == 0 and len( os.path.basename(outputFile)) > 0: outputFile = os.path.join(analysisFolder, outputFile) if len(os.path.dirname(graphFile)) == 0 and len( os.path.basename(graphFile)) > 0: graphFile = os.path.join(analysisFolder, graphFile) if len(os.path.dirname(logFile)) == 0 and len( os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) logList.append([""]) logList.append(["Input stats file: " + inStats]) logList.append(["File with categories to check: " + listOfCategoriesFile]) logList.append(["Output GV table: " + outputFile]) logList.append(["Output graph table: " + graphFile]) logList.append(["Output log file: " + logFile]) logList.append([""]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP bigList = stats.load2stringList(bigListFile, removeCommas=True) higherElements = stats.load2stringList(listOfCategoriesFile, removeCommas=True) if higherElements[0] == ['id', 'Z', 'n' ] or higherElements[0] == ['id', 'n', 'Z', 'FDR']: # this means the list comes from SanXoTSqueezer # so the header and the extra columns have to be removed higherElements = stats.extractColumns(higherElements[1:], 0) else: # only removing extra columns and converting list into text higherElements = stats.extractColumns(higherElements, 0) if useSubStats: # añadir salida para log *** subData = stats.arrangeSubData(inStats=inStats, higherElements=higherElements, relFile=relationsFile, ignoreNaNsInFDR=False) else: subData = None GVFileText = createGVFileTree(bigList, higherElements, subData=subData, ZLimit=graphLimits, altMax=altMax, defaultNodeColour=defaultNodeColour, errorNodeColour=errorNodeColour, minColour=minColour, middleColour=middleColour, maxColour=maxColour) stats.saveTextFile(outputFile, GVFileText) createDOTTree( outputFile, graphFile, imageFormat=graphFileFormat, ) stats.saveFile(logFile, logList, "LOG FILE")
def main(argv): version = "v0.17" analysisName = "" analysisFolder = "" varianceSeed = 0.001 FDRLimit = 0.01 varianceSeedProvided = False removeDuplicateUpper = False tags = "!out" outlierTag = "out" logicOperatorsAsWords = False dataFile = "" relationsFile = "" newRelFile = "" removedRelFile = "" defaultDataFile = "data" defaultRelationsFile = "rels" defaultTaggedRelFile = "tagged" defaultNewRelFile = "cleaned" defaultRemovedRelFile = "outliers" defaultOutputInfo = "infoFile" infoFile = "" varFile = "" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" verbose = True oldWay = False # instead of tagging outliers, separating relations files, the old way modeUsed = mode.onePerHigher logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", ["analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-n", "--newrelfile"): removedRelFile = arg elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-u", "--one-to-one"): modeUsed = mode.onlyOne elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("--oldway"): oldWay = True elif opt in ("-f", "--fdrlimit"): FDRLimit = float(arg) elif opt in ("-D", "--removeduplicateupper"): removeDuplicateUpper = True elif opt in ("--tags"): if arg.strip().lower() != "!out": tags = "!out&(" + arg + ")" elif opt in ("--word-operators"): logicOperatorsAsWords = True elif opt in ("--outliertag"): outlierTag = "out" elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced = True) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed) if not varianceOk: logList.append(["Variance not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(relationsFile) == 0: relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) # output if len(newRelFile) == 0: if oldWay: # suffix: "cleaned" newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension) else: # suffix: "tagged" newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension) if len(removedRelFile) == 0: removedRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension) if len(os.path.dirname(newRelFile)) == 0: newRelFile = os.path.join(analysisFolder, newRelFile) if len(os.path.dirname(removedRelFile)) == 0: removedRelFile = os.path.join(analysisFolder, removedRelFile) if len(infoFile) == 0: infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) if oldWay: logList.append(["Output relations file without outliers: " + newRelFile]) logList.append(["Output relations file with outliers only: " + removedRelFile]) logList.append(["Removing duplicate higher level elements: " + str(removeDuplicateUpper)]) logList.append(["OldWay option activated: outliers are removed instead of tagged"]) else: logList.append(["Relations file tagging outliers: " + newRelFile]) logList.append(["Tags to filter relations: " + tags]) logList.append(["Tag used for outliers: " + outlierTag]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP relations = stats.loadRelationsFile(relationsFile) data = stats.loadInputDataFile(dataFile) if oldWay: # only for backward compatibility. Note that tags are not supported newRelations, removedRelations, logResults = \ getRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper) else: newRelations, removedRelations, logResults = \ tagRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper, tags = tags, outlierTag = outlierTag, logicOperatorsAsWords = logicOperatorsAsWords) if oldWay: stats.saveFile(newRelFile, newRelations, "idsup\tidinf") else: stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags") stats.saveFile(infoFile, logList, "INFO FILE") if oldWay: stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
def main(argv): version = "v0.14" verbose = False analysisName = "" defaultAnalysisName = "squeeze" analysisFolder = "" # parametres minimumElements = 2 maximumElements = 1e6 maximumFDR = 0.05 minimumZ = 0.0 # take all by default filterByFDR = True # if false, then it filters by abs(Z) # input files lowerStats = "" higherStats = "" defaultLowerStatsFile = "lower" defaultHigherStatsFile = "upper" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultOutputFile = "outList" defaultLogFile = "logFile" # output files logFile = "" outputFile = "" logList = [["SanXoTSqueezer " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:l:L:o:p:u:n:N:f:z:h", [ "analysis=", "lowerstats=", "logfile=", "outputfile=", "place=", "minelements=", "maxelements=", "fdr=", "sigmas=", "help" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-l", "--lowerstats"): lowerStats = arg if opt in ("-u", "--upperstats"): higherStats = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-o", "--outputfile"): outputFile = arg elif opt in ("-n", "--minelements"): minimumElements = int(arg) elif opt in ("-N", "--maxelements"): maximumElements = int(arg) elif opt in ("-f", "--fdr"): maximumFDR = float(arg) elif opt in ("-z", "--sigmas"): filterByFDR = False minimumZ = float(arg) elif opt in ("-h", "--help"): printHelp(version) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(lowerStats) > 0: analysisName = os.path.splitext(os.path.basename(lowerStats))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(lowerStats) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(lowerStats)) > 0: analysisFolder = os.path.dirname(lowerStats) # input if len(lowerStats) == 0: lowerStats = os.path.join( analysisFolder, analysisName + "_" + defaultLowerStatsFile + defaultTableExtension) if len(higherStats) == 0: higherStats = os.path.join( analysisFolder, analysisName + "_" + defaultHigherStatsFile + defaultTableExtension) if len(os.path.dirname(lowerStats)) == 0 and len(analysisFolder) > 0: lowerStats = os.path.join(analysisFolder, lowerStats) if len(os.path.dirname(higherStats)) == 0 and len(analysisFolder) > 0: higherStats = os.path.join(analysisFolder, higherStats) # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension) if len(logFile) == 0: logFile = os.path.join( analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(outputFile)) == 0 and len( os.path.basename(outputFile)) > 0: outputFile = os.path.join(analysisFolder, outputFile) if len(os.path.dirname(logFile)) == 0 and len( os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) logList.append([""]) logList.append(["Lower input stats file: " + lowerStats]) logList.append(["Higher input stats file: " + higherStats]) logList.append(["Output list: " + outputFile]) logList.append(["Output log file: " + logFile]) logList.append( ["Minimum elements in higher category: " + str(minimumElements)]) logList.append( ["Maximum elements in higher category: " + str(maximumElements)]) logList.append(["Minimum z: " + str(minimumZ)]) logList.append([""]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP try: lowerData = stats.loadStatsDataFile(lowerStats) logList.append(["Lower data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting lower data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: higherData = stats.loadStatsDataFile(higherStats) logList.append(["Higher data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting higher data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: filteredList = filterNFDRorZ(lowerData, higherData, minN=minimumElements, maxN=maximumElements, minZ=minimumZ, maxFDR=maximumFDR, useFDR=filterByFDR) filteredList = stats.sortByIndex(filteredList, 1) logList.append(["Data correctly filtered."]) except getopt.GetoptError: logList.append(["Error while getting data filtered by N and Z."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: stats.saveFile(outputFile, filteredList, "id\tn\tZ\tFDR\tX") logList.append(["Output data correctly saved."]) except getopt.GetoptError: logList.append(["Error while saving output data."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")
def main(argv): version = "v0.23" verbose = False showGraph = True graphLimits = 6.0 showLegend = True graphDPI = 100 # default of Matplotlib's savefig method graphLineWidth = 1.0 graphFontSize = 8 analysisName = "" defaultAnalysisName = "sanxot" analysisFolder = "" # input files inStats = "" defaultStatsFile = "stats" defaultRelationsFile = "rels" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" relationsFile = "" upperLevelToGraphFile = "" # output files defaultUpperLevelToGraphFile = "ulst" defaultOutputGraph = "outGraph" defaultOutputFile = "outSigmoids" defaultExtraTableFile = "outExtra" defaultLogFile = "logFile" logFile = "" graphFile = "" outputFile = "" extraTableFile = "" graphTitle = "Z plot" labelFontSize = 12 minimalGraphTicks = False xLabel = "Zij" yLabel = "Rank/N" logList = [["SanXoTGauss " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt( argv, "a:p:z:r:c:L:G:o:l:d:W:s:x:t:Z:hgkT", [ "analysis=", "folder=", "statsfile=", "relfile=", "list=", "logfile=", "graphfile=", "outputfile=", "graphlimits=", "graphfontsize=", "graphdpi=", "graphlinewidth=", "extratable=", "graphtitle=", "labelfontsize=", "help", "no-graph", "no-legend", "minimalgraphticks", "xlabel=", "ylabel=" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-z", "--statsfile"): inStats = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-c", "--list"): upperLevelToGraphFile = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-G", "--graphfile"): graphFile = arg elif opt in ("-g", "--no-graph"): showGraph = False elif opt in ("-k", "--no-legend"): showLegend = False elif opt in ("-o", "--outputfile"): outputFile = arg elif opt in ("-x", "--extratable"): extraTableFile = arg elif opt in ("-s", "--graphfontsize"): graphFontSize = int(arg) elif opt in ("-d", "--graphdpi"): graphDPI = float(arg) elif opt in ("-W", "--graphlinewidth"): graphLineWidth = float(arg) elif opt in ("-t", "--graphtitle"): graphTitle = arg elif opt in ("-Z", "--labelfontsize"): labelFontSize = float(arg) elif opt in ("-l", "--graphlimits"): graphLimits = float(arg) elif opt in ("-T", "--minimalgraphticks"): minimalGraphTicks = True elif opt in ("--xlabel"): xLabel = arg elif opt in ("--ylabel"): yLabel = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(inStats) > 0: analysisName = os.path.splitext(os.path.basename(inStats))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(inStats) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(inStats)) > 0: analysisFolder = os.path.dirname(inStats) # input if len(inStats) == 0: inStats = os.path.join( analysisFolder, analysisName + "_" + defaultStatsFile + defaultTableExtension) if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0: inStats = os.path.join(analysisFolder, inStats) if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(upperLevelToGraphFile) == 0: upperLevelToGraphFile = os.path.join( analysisFolder, analysisName + "_" + defaultUpperLevelToGraphFile + defaultTextExtension) if len(os.path.dirname(relationsFile)) == 0 and len( os.path.basename(relationsFile)) > 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(os.path.dirname(upperLevelToGraphFile)) == 0 and len( os.path.basename(upperLevelToGraphFile)) > 0: upperLevelToGraphFile = os.path.join(analysisFolder, upperLevelToGraphFile) # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension) if len(extraTableFile) == 0: extraTableFile = os.path.join( analysisFolder, analysisName + "_" + defaultExtraTableFile + defaultTableExtension) if len(logFile) == 0: logFile = os.path.join( analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(graphFile) == 0: graphFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputGraph + defaultGraphExtension) if len(os.path.dirname(outputFile)) == 0 and len( os.path.basename(outputFile)) > 0: outputFile = os.path.join(analysisFolder, outputFile) if len(os.path.dirname(extraTableFile)) == 0 and len( os.path.basename(extraTableFile)) > 0: extraTableFile = os.path.join(analysisFolder, extraTableFile) if len(os.path.dirname(logFile)) == 0 and len( os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) if len(os.path.dirname(graphFile)) == 0 and len( os.path.basename(graphFile)) > 0: graphFile = os.path.join(analysisFolder, graphFile) logList.append([""]) logList.append(["Input stats file: " + inStats]) logList.append(["Relations file: " + relationsFile]) logList.append(["File with sigmoids to depict: " + upperLevelToGraphFile]) logList.append(["Output sigmoids table: " + outputFile]) logList.append(["Output extra table: " + extraTableFile]) logList.append(["Output log file: " + logFile]) logList.append(["Output graph file: " + graphFile]) logList.append([""]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP try: data, logListExtraInfo = stats.arrangeSubData( inStats=inStats, uFile=upperLevelToGraphFile, relFile=relationsFile, ignoreNaNsInFDR=True) logList.append(logListExtraInfo) logList.append(["Data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: bigTable, bigTableHeader, extraTable, extraHeader = stats.createBigTable( data) logList.append(["Sigmoid table correctly generated."]) except getopt.GetoptError: logList.append(["Error while generating sigmoid table."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: stats.saveFile(outputFile, bigTable, bigTableHeader) logList.append(["Sigmoid table correctly saved."]) except getopt.GetoptError: logList.append(["Error while saving sigmoid table."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: stats.saveFile(extraTableFile, extraTable, extraHeader) logList.append(["Extra table correctly saved."]) except getopt.GetoptError: logList.append(["Error while saving extra table."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: stats.graphZij(data, graphLimits=graphLimits, graphTitle=graphTitle, graphFile=graphFile, showGraph=showGraph, manySigmoids=True, showLegend=showLegend, dpi=graphDPI, graphFontSize=graphFontSize, lineWidth=graphLineWidth, labelFontSize=labelFontSize, minimalGraphTicks=minimalGraphTicks, xLabel=xLabel, yLabel=yLabel) logList.append(["Graph correctly saved."]) except getopt.GetoptError: logList.append(["Error while saving graph."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")
def main(options, programName, programVersion): ## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION # *-*-* add any default values here, such as default names of files # basic default info inputFile = "" MSFFileCol = "" RAWFileCol = "" scanNumberCol = "" chargeCol = "" pepSequenceCol = "" XCorrCol = "" initialRow = 1 verbose = False QuiXMLResults = False # otherwise, QuiXML will be considered changeOriginalMSFFile = False # False = copy MSF to MSF_zeroed, True = change original file logFile = "" analysisName = "" analysisFolder = "" defaultAnalysisName = programName.lower() # default extensions defaultTableExtension = ".xls" defaultTextExtension = ".txt" # default file names defaultLogFile = "logFile" leadingFile = "" # *-*-* change this by the data file or any important file defining the operation # basic log file logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] # parsing arguments from commandline # *-*-* add any arguments used options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.") options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.") options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.") # -d[nombre archivo donde están los xcorr = 0] # -M[columna con nombre del msf] # -r[rawfilecol] # -s[scannumbercol] # -q[chargecol] # -P[pepsequencecol] # -x[xcorrcol] # -R[initialrow] options.add_argument("-d", "--inputfile", type = str, default = "", required = True, help = "Name of the text file containing the list of PSMs to keep in the MSF.") options.add_argument("-M", "--msffile", type = str, default = "", required = True, help = "Name of the MSF file having the PSMs to modify.") options.add_argument("-r", "--rawfilecol", type = str, default = "", required = False, help = "Header of the column contaning the name of the RAW files. Default is \"RAWFileName\".") options.add_argument("-s", "--scannumbercol", type = str, default = "", required = False, help = "Header of the column containing the scan numbers. Default is \"FirstScan\".") options.add_argument("-q", "--chargecol", type = str, default = "", required = False, help = "Header of the column containing the charge. Default is \"Charge\".") options.add_argument("-S", "--pepsequencecol", type = str, default = "", required = False, help = "Header of the column containing the identified peptide sequences. Default is \"Sequence\"") options.add_argument("-x", "--xcorrcol", type = str, default = "", required = False, help = "Header of the column containing the XCorr. Default is \"XC1D\".") options.add_argument("-R", "--initialrow", type = str, default = "1", required = False, help = "The position of the row containing the headers. Default is 1.") options.add_argument("-v", "--verbose", action='store_true', help = "Show extra info while operating.") options.add_argument("-Q", "--quixml", action='store_true', help = "Use column headers for QuiXML results tab separated table file (otherwise, pRatio results file headers will be used by default).") options.add_argument("-O", "--changeoriginalmsf", action='store_true', help = "Do not copy the MSF file to be modified, just remove bad PSMs in the original file.") # *-*-* add easter egg if wanted arguments = options.parse_args() # copying parsed arguments # copy any arguments used verbose = arguments.verbose QuiXMLResults = arguments.quixml changeOriginalMSFFile = arguments.changeoriginalmsf if QuiXMLResults: RAWFileCol = "RAWFileName" scanNumberCol = "FirstScan" chargeCol = "Charge" pepSequenceCol = "Sequence" XCorrCol = "XC1D" initialRow = 24 else: # pRatio results are default RAWFileCol = "RAWFile" scanNumberCol = "FirstScan" chargeCol = "Charge" pepSequenceCol = "Sequence" XCorrCol = "Xcorr1Search" initialRow = 1 if len(arguments.analysis) > 0: analysisName = arguments.analysis if len(arguments.place) > 0: analysisFolder = arguments.place if len(arguments.logfile) > 0: logfile = arguments.logFile if len(arguments.inputfile) > 0: inputFile = arguments.inputfile if len(arguments.msffile) > 0: MSFFile = arguments.msffile if len(arguments.rawfilecol) > 0: RAWFileCol = arguments.rawfilecol if len(arguments.scannumbercol) > 0: scanNumberCol = arguments.scannumbercol if len(arguments.chargecol) > 0: chargeCol = arguments.chargecol if len(arguments.pepsequencecol) > 0: pepSequenceCol = arguments.pepsequencecol if len(arguments.xcorrcol) > 0: XCorrCol = arguments.xcorrcol if len(arguments.initialrow) > 0: initialRow = int(arguments.initialrow) RAWFileCol = "[" + RAWFileCol + "]" scanNumberCol = "[" + scanNumberCol + "]" chargeCol = "[" + chargeCol + "]" pepSequenceCol = "[" + pepSequenceCol + "]" XCorrCol = "[" + XCorrCol + "]" ## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION ## ********************************************************** ## REGION: FILE NAMES SETUP # replace leadingfile by the real leading file if len(analysisName) == 0: if len(leadingFile) > 0: analysisName = os.path.splitext(os.path.basename(leadingFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # input # *-*-* add input file setup if len(os.path.dirname(inputFile)) == 0: inputFile = os.path.join(analysisFolder, inputFile) if len(os.path.dirname(MSFFile)) == 0: MSFFile = os.path.join(analysisFolder, MSFFile) if os.path.splitext(MSFFile)[1] != ".msf": print print "Warning: your MSF file does not seem to be an MSF file." if changeOriginalMSFFile: newMSFFile = MSFFile else: newMSFFile = os.path.splitext(MSFFile)[0] + "_zeroed" + os.path.splitext(MSFFile)[1] # output # *-*-* add output file setup if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) # *-*-* add to the log the input and output filenames logList.append([""]) logList.append(["Output log file: " + logFile]) logList.append([""]) ## END REGION: FILE NAMES SETUP ## ********************************************************** ## REGION: PROGRAM BASIC STRUCTURE # *-*-* add basic structure print if not changeOriginalMSFFile: message = "Creating new file: %s" % newMSFFile logList.append([message]) print message shutil.copyfile(MSFFile, newMSFFile) message = "Loading PSMs to keep from: %s" % inputFile logList.append([message]) print message myList = getDataFromTXT(inputFile, iField = RAWFileCol, jField = scanNumberCol, kField = chargeCol, lField = pepSequenceCol, initialRow = initialRow, filterString = XCorrCol + ">0", removeDuplicates = True, removeCommas = True) if len(myList) == 0: message = "\nWarning!! No PSMs found in the text file provided,\nDeleting all XCorrs.\n" else: message = "PSMs found in text file: %i" % len(myList) logList.append([message]) print message compareListIdentified = [] for element in myList: myRAWFilePath = element[0] myRAWFile = os.path.basename(myRAWFilePath) myScanNumber = element[1] myCharge = element[2] myPepSequence = element[3] # currently unused compareListIdentified.append([myRAWFile, int(myScanNumber), int(myCharge)]) connexion = sqlite3.connect(newMSFFile) c = connexion.cursor() existingScanQuery = """select p.peptideid, fi.filename, sh.firstscan, sh.lastscan, sh.charge, p.sequence, ps.scorevalue from peptides p, peptideScores ps, spectrumHeaders sh, massPeaks mp, fileInfos fi, processingNodeScores scoreNames where p.peptideid = ps.peptideid and sh.spectrumid = p.spectrumid and (fi.fileid = mp.fileid or mp.fileid = -1) and mp.masspeakid = sh.masspeakid and scoreNames.scoreid = ps.scoreid and scoreNames.ScoreName = 'Xcorr' order by fi.filename desc, sh.firstscan asc, sh.lastscan asc, sh.charge asc, ps.scorevalue desc;""" message = "Searching all PSMs in database..." logList.append([message]) print message wholeMSFList = [] for psm in c.execute(existingScanQuery): wholeMSFList.append(psm) checked = 0 changed = 0 alreadyZero = 0 message = "Zeroing...!" logList.append([message]) message = "Original MSF contains %i PSMs." % len(wholeMSFList) logList.append([message]) logList.append([""]) logList.append(["Zeroed\tRAWFileName\tScanNumber\tCharge\tSequence (without PTMs)\tOriginal XCorr"]) print message for psm in wholeMSFList: checked += 1 pepId = int(psm[0]) rawFilePath = str(psm[1]) rawFile = os.path.basename(rawFilePath) firstScan = int(psm[2]) lastScan = int(psm[3]) charge = int(psm[4]) sequence = str(psm[5]) XCorr = float(psm[6]) # now check whether this is in the given list. If not, then make XCorr = 0 # compareListIdentified.append([myRAWFile, myScanNumber, myCharge, myPepSequence]) compareListInMSF = [rawFile, firstScan, charge] msfScanPresentInList = (compareListInMSF in compareListIdentified) if XCorr == 0: alreadyZero += 1 if not msfScanPresentInList and XCorr != 0: changed += 1 zeroSettingQuery = """update peptideScores set scoreValue = 0 where peptideID in ( select p.peptideID from fileinfos fi, massPeaks mp, spectrumHeaders sh, peptides p where (fi.fileid = mp.fileid or mp.fileid = -1) and mp.masspeakid = sh.masspeakid and sh.spectrumid = p.spectrumid and sh.firstScan = %i and sh.charge = %i and fi.fileName like "%%%s" ) and scoreID = ( select scoreId from processingNodeScores where scoreName = "XCorr" );""" % (firstScan, charge, rawFile) # pdb.set_trace() c.execute(zeroSettingQuery) message = """%i\t%s\t%i\t%i\t%s\t%f""" % (checked, rawFile, firstScan, charge, sequence, XCorr) logList.append([message]) if verbose: message = """Zeroed (%i/%i):\nraw = "%s"\nscan = %i\ncharge = %i\nsequence = %s\nXCorr = %f\n""" % (checked, len(wholeMSFList), rawFile, firstScan, charge, sequence, XCorr) print message print message = "PTMs found: %i,\nPTMs zeroed: %i,\nDifference: %i,\nAlready zero: %i" % (checked, changed, checked - changed, alreadyZero) print message logList.append([""]) logList.append([message]) print print "Saving changes..." connexion.commit() print print "Closing connexion..." connexion.close() ## END REGION: PROGRAM BASIC STRUCTURE ## ********************************************************** ## REGION: SAVING FILES # *-*-* add any files to be saved here try: # operations here logList.append(["Most probably, everything went fine."]) except getopt.GetoptError: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE") print "Done!"
def main(argv): version = "v0.05" analysisName = "" analysisFolder = "" logFile = "" # in data prefix = "" extraPrefix = "" medianTag = "med" # default filenames defaultInfoFileSuffix = "_infoFile.txt" defaultLogFile = "logFile" defaultAnalysisName = "medianSelection" # default extensions defaultTableExtension = ".xls" defaultTextExtension = ".txt" verbose = True logList = [["Anselmo " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:f:g:m:L:hH", ["analysis=", "folder=", "prefix=", "extraprefix=", "mediantag=", "logfile=", "help", "egg", "easteregg"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version, True) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg elif opt in ("-p", "--place", "--folder"): analysisFolder = arg elif opt in ("-f", "--prefix"): prefix = arg elif opt in ("-g", "--extraprefix"): extraPrefix = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-m", "--mediantag"): medianTag = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced = True) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # next "if" disables extra copy when extraPrefix is same as prefix if len(extraPrefix) > 0 and extraPrefix == prefix: extraPrefix = "" # input # output if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) ##logList.append(["Median variance = " + "poner***"]) # END REGION: FILE NAMES SETUP # get infoFile list infoFileList = glob.glob(os.path.join(analysisFolder, prefix + "*" + defaultInfoFileSuffix)) logList.append([]) logList.append(["Folder = " + analysisFolder]) logList.append([]) logList.append(["Info files with prefix \"%s\"" % prefix]) varList = [] for varFile in infoFileList: variance, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = False) if varianceOk: varList.append([varFile, variance]) # get info file with median variance varList = stats.sortByIndex(varList, 1) medianVariance = stats.medianByIndex(varList, 1) medianIndex = getMedianIndex(varList = varList, variance = medianVariance) for element in varList: if element[0] == varList[medianIndex][0]: logList.append(["%s, variance = %f [taken]" % (os.path.basename(element[0]), element[1])]) else: logList.append(["%s, variance = %f" % (os.path.basename(element[0]), element[1])]) # get prefix of median experiment medianInfoFile = os.path.basename(varList[medianIndex][0]) randTag = medianInfoFile[len(prefix):len(medianInfoFile) - len(defaultInfoFileSuffix)] medianPrefix = prefix + randTag extraMedianPrefix = "" if len(extraPrefix) > 0: extraMedianPrefix = extraPrefix + randTag # get file list with specific prefix medianExperimentFileList = glob.glob(os.path.join(analysisFolder, medianPrefix + "*.*")) extraPrefixFileList = [] if len(extraMedianPrefix) > 0: extraPrefixFileList = glob.glob(os.path.join(analysisFolder, extraMedianPrefix + "*.*")) # copy files including median tag extraLogList = copyFilesWithPrefix(fileList = medianExperimentFileList, folder = analysisFolder, prefix = prefix, message = "Renamed files:", tag = medianTag) logList.extend(extraLogList) if len(extraPrefixFileList) > 0: extraLogList = copyFilesWithPrefix(fileList = extraPrefixFileList, folder = analysisFolder, prefix = extraPrefix, message = "Renamed extra files:", tag = medianTag) logList.extend(extraLogList) # save logFile stats.saveFile(logFile, logList, "INFO FILE")
def main(options, programName, programVersion): ## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION # basic default info logFile = "" analysisName = "" analysisFolder = "" defaultAnalysisName = programName.lower() relFile = "" DBFile = "" FASTAFile = "" previousFile = "" accNumCol = 1 catCol = 2 catPrefix = "" header = "idsup\tidinf" previousList = [] # default extensions defaultTableExtension = ".tsv" defaultTextExtension = ".txt" # default file names defaultLogFile = "logFile" defaultRelFile = "rels" # basic log file logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] # parsing arguments from commandline options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.") options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.") options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.") options.add_argument("-d", "--dbfile", type = str, default = "", required = True, help = "The input file containing accession numbers and categories.") options.add_argument("-x", "--previousfile", type = str, default = "", required = False, help = "An optional relation file to which concatenate resulting relations (if omitted, a new file will be produced).") options.add_argument("-q", "--accnumcol", type = str, default = "1", required = False, help = "Column where accession numbers of genes/proteins are. First column is 1. Default is 1.") options.add_argument("-c", "--categorycol", type = str, default = "2", required = False, help = "Column where categories are. First column is 1. Default is 2.") options.add_argument("-f", "--prefix", type = str, default = "", required = False, help = "Prefix to add to all categories found in this parsing (such as \"GO-full_\", \"Panther_\", or \"KEGG=2017-01-10_\".") options.add_argument("--fasta", type = str, default = "", required = False, help = "FASTA file contaning the identifiers we want to replace by FASTA headers in the final file. Note that identifiers not appearing in this FASTA file will be removed from the final list.") # add string with category separator # add string with accNum separator # *-*-* add easter egg if wanted arguments = options.parse_args() # copying parsed arguments # copy any arguments used if len(arguments.analysis) > 0: analysisName = arguments.analysis if len(arguments.place) > 0: analysisFolder = arguments.place if len(arguments.logfile) > 0: logFile = arguments.logfile if len(arguments.dbfile) > 0: DBFile = arguments.dbfile if len(arguments.fasta) > 0: FASTAFile = arguments.fasta if len(arguments.previousfile) > 0: previousFile = arguments.previousfile if len(arguments.accnumcol) > 0: accNumCol = int(arguments.accnumcol) if len(arguments.categorycol) > 0: catCol = int(arguments.categorycol) if len(arguments.prefix) > 0: catPrefix = arguments.prefix ## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION ## ********************************************************** ## REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(DBFile) > 0: analysisName = os.path.splitext(os.path.basename(DBFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # input if len(os.path.dirname(DBFile)) == 0: DBFile = os.path.join(analysisFolder, DBFile) if len(previousFile) > 0: if len(os.path.dirname(previousFile)) == 0: previousFile = os.path.join(analysisFolder, previousFile) if len(FASTAFile) > 0: if len(os.path.dirname(FASTAFile)) == 0: FASTAFile = os.path.join(analysisFolder, FASTAFile) # output if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) if len(relFile) == 0: relFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelFile + defaultTableExtension) if len(os.path.dirname(relFile)) == 0 and len(os.path.basename(relFile)) > 0: relFile = os.path.join(analysisFolder, relFile) logList.append([""]) logList.append(["Input table with categories and proteins: " + DBFile]) if len(previousFile) > 0: logList.append(["Previous file to which new qc relations are added: " + previousFile]) if len(FASTAFile) > 0: logList.append(["FASTA file to replace identifiers for FASTA headers: " + FASTAFile]) logList.append(["Category column: %i, protein column: %i" % (catCol, accNumCol)]) logList.append(["Prefix added to categories: " + catPrefix]) logList.append(["Output relations file: " + relFile]) logList.append(["Output log file: " + logFile]) logList.append([""]) ## END REGION: FILE NAMES SETUP ## ********************************************************** ## REGION: PROGRAM BASIC STRUCTURE if len(previousFile) > 0: #otherwise, previousList = [] previousList = stats.load2stringList(previousFile, removeCommas = True) header = "" AccNum2FASTAHeader = getFASTAHeaders(FASTAFile) gc.collect() DBList = stats.load2stringList(DBFile, removeCommas = True) newRelations = getRelations(bigTable = DBList, qCol = accNumCol, cCol = catCol, cPrefix = catPrefix, FASTAHeaders = AccNum2FASTAHeader) newRelationsSorted = stats.sortByIndex(newRelations, 0) relationList = previousList + newRelationsSorted gc.collect() ## END REGION: PROGRAM BASIC STRUCTURE ## ********************************************************** ## REGION: SAVING FILES try: stats.saveFile(relFile, relationList, header) logList.append(["Everything went fine."]) stats.saveFile(logFile, logList, "LOG FILE") except getopt.GetoptError: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2)
def main(argv): version = "v0.17" analysisName = "" analysisFolder = "" varianceSeed = 0.001 FDRLimit = 0.01 varianceSeedProvided = False removeDuplicateUpper = False tags = "!out" outlierTag = "out" logicOperatorsAsWords = False dataFile = "" relationsFile = "" newRelFile = "" removedRelFile = "" defaultDataFile = "data" defaultRelationsFile = "rels" defaultTaggedRelFile = "tagged" defaultNewRelFile = "cleaned" defaultRemovedRelFile = "outliers" defaultOutputInfo = "infoFile" infoFile = "" varFile = "" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" verbose = True oldWay = False # instead of tagging outliers, separating relations files, the old way modeUsed = mode.onePerHigher logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", [ "analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-n", "--newrelfile"): removedRelFile = arg elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-u", "--one-to-one"): modeUsed = mode.onlyOne elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("--oldway"): oldWay = True elif opt in ("-f", "--fdrlimit"): FDRLimit = float(arg) elif opt in ("-D", "--removeduplicateupper"): removeDuplicateUpper = True elif opt in ("--tags"): if arg.strip().lower() != "!out": tags = "!out&(" + arg + ")" elif opt in ("--word-operators"): logicOperatorsAsWords = True elif opt in ("--outliertag"): outlierTag = "out" elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced=True) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join( analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(os.path.dirname(varFile)) == 0 and len( os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile( varFile, verbose=verbose, defaultSeed=varianceSeed) if not varianceOk: logList.append(["Variance not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) # output if len(newRelFile) == 0: if oldWay: # suffix: "cleaned" newRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension) else: # suffix: "tagged" newRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension) if len(removedRelFile) == 0: removedRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension) if len(os.path.dirname(newRelFile)) == 0: newRelFile = os.path.join(analysisFolder, newRelFile) if len(os.path.dirname(removedRelFile)) == 0: removedRelFile = os.path.join(analysisFolder, removedRelFile) if len(infoFile) == 0: infoFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) if oldWay: logList.append( ["Output relations file without outliers: " + newRelFile]) logList.append( ["Output relations file with outliers only: " + removedRelFile]) logList.append([ "Removing duplicate higher level elements: " + str(removeDuplicateUpper) ]) logList.append([ "OldWay option activated: outliers are removed instead of tagged" ]) else: logList.append(["Relations file tagging outliers: " + newRelFile]) logList.append(["Tags to filter relations: " + tags]) logList.append(["Tag used for outliers: " + outlierTag]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP relations = stats.loadRelationsFile(relationsFile) data = stats.loadInputDataFile(dataFile) if oldWay: # only for backward compatibility. Note that tags are not supported newRelations, removedRelations, logResults = \ getRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper) else: newRelations, removedRelations, logResults = \ tagRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper, tags = tags, outlierTag = outlierTag, logicOperatorsAsWords = logicOperatorsAsWords) if oldWay: stats.saveFile(newRelFile, newRelations, "idsup\tidinf") else: stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags") stats.saveFile(infoFile, logList, "INFO FILE") if oldWay: stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
def main(argv): version = "v0.04" analysisName = "" cleaveSites = "KR" # trypsin default removePalindromes = False defaultAnalysisName = "inversor" analysisFolder = "" defaultTableExtension = ".xls" defaultTextExtension = ".txt" defaultFastaExtension = ".fasta" graphFileFormat = "png" defaultFastaFile = "fastadef" invertedFastaFile = "" defaultInvertedFileSuffix = "inv" defaultLogFile = "logFile" fastaFile = "" logFile = "" logList = [["Inversor " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:f:c:rh", [ "analysis=", "folder=", "fastafile=", "cleavesites=", "place=", "removepalindromes", "help", "egg", "easteregg" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg elif opt in ("-p", "--place", "--folder"): analysisFolder = arg elif opt in ("-f", "--fastafile"): fastaFile = arg elif opt in ("-c", "--cleavesites"): cleaveSites = arg.strip() elif opt in ("-r", "--removepalindromes"): removePalindromes = True elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(fastaFile) > 0: analysisName = os.path.splitext(os.path.basename(fastaFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(fastaFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(fastaFile)) > 0: analysisFolder = os.path.dirname(fastaFile) # input if len(os.path.dirname(fastaFile)) == 0 and len(fastaFile) > 0: fastaFile = os.path.join(analysisFolder, fastaFile) # output if len(invertedFastaFile) == 0: invertedFastaFile = os.path.join( analysisFolder, analysisName + "_" + defaultInvertedFileSuffix + defaultFastaExtension) if len(logFile) == 0: logFile = os.path.join( analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) logList.append([""]) logList.append(["Input FASTA file: " + fastaFile]) logList.append(["Inverted FASTA file: " + invertedFastaFile]) logList.append(["Cleave sites used: " + cleaveSites]) logList.append([""]) # END REGION: FILE NAMES SETUP try: readAndWriteNewFASTAFile(fastaFile, invertedFastaFile, digestionPoints=cleaveSites, removePalindromes=removePalindromes) logList.append(["Everything went fine."]) except getopt.GetoptError: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")
def main(options, programName, programVersion): ## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION # basic default info logFile = "" analysisName = "" analysisFolder = "" defaultAnalysisName = programName.lower() caFDR = 0.05 modeSanXoTSieve = "newWay" # alternatively, "oldWay" coordination = 0 caseSensitive = True # default extensions defaultTableExtension = ".xls" defaultTextExtension = ".txt" # default file names defaultLogFile = "logFile" qcInputFile = "qcInput" qcInputFileNoOuts = "" caInputFile = "caInput" # basic log file logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] # parsing arguments from commandline options.add_argument("-a", "--analysis", type = str, default = "", required = False, help = "Use a prefix for the output files.") options.add_argument("-p", "--place", type = str, default = "", required = False, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.") options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.") options.add_argument("-q", "--qcinput", type = str, default = "", required = True, help = "Input outstats file with the q2c integration, including outliers. It must include outliers tagged in new SanXoTSieve files, or, alternatively, it must add the corresponding file with no outliers (see -n argument) for SanXoTSieve in oldWay mode.") options.add_argument("-n", "--qcinputnoouts", type = str, default = "", required = False, help = "Input outstats file with the q2c integration, NOT including outliers (using this parameter automatically implies the oldWay option has been used in SanXoTSieve, i.e. removing outliers from the relations file, instead of just tagging them).") options.add_argument("-c", "--cainput", type = str, default = "", required = True, help = "Input outstats file with the c2a integration, defining in the FDR column which cateogries are considered to be changing.") options.add_argument("--cafdr", type = str, default = "0.05", required = False, help = "To consider a non-default FDR value for changing categories in the c2a integration (default is 0.05).") options.add_argument("--caseinsensitive", action='store_true', help = "Consider case insensitive categories and protein identifiers (by default, they are case sensitive).") arguments = options.parse_args() # copying parsed arguments # copy any arguments used if len(arguments.analysis) > 0: analysisName = arguments.analysis if len(arguments.place) > 0: analysisFolder = arguments.place if len(arguments.logfile) > 0: logFile = arguments.logfile if len(arguments.qcinput) > 0: qcInputFile = arguments.qcinput if len(arguments.qcinputnoouts) > 0: qcInputFileNoOuts = arguments.qcinputnoouts modeSanXoTSieve = "oldWay" if len(arguments.cainput) > 0: caInputFile = arguments.cainput if len(str(arguments.cafdr)) > 0: try: caFDR = float(arguments.cafdr) except: message = "Warning: FDR for categories changing could not be parsed, %f will be used instead." % caFDR logList.append([""]) logList.append([message]) logList.append([""]) print "" print message print "" caseSensitive = not arguments.caseinsensitive ## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION ## ********************************************************** ## REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(qcInputFile) > 0: analysisName = os.path.splitext(os.path.basename(qcInputFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(analysisFolder) == 0: analysisFolder = os.getcwd() # input if len(os.path.dirname(qcInputFile)) == 0: qcInputFile = os.path.join(analysisFolder, qcInputFile) if len(os.path.dirname(qcInputFileNoOuts)) == 0 and modeSanXoTSieve == "oldWay": qcInputFileNoOuts = os.path.join(analysisFolder, qcInputFileNoOuts) if len(os.path.dirname(caInputFile)) == 0: caInputFile = os.path.join(analysisFolder, caInputFile) # output # the only output is the logFile, which includes a last line with the coordination if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) logList.append([""]) logList.append(["Input protein-to-category outstats file: " + qcInputFile]) logList.append(["Input category-to-all outstats file: " + caInputFile]) if len(qcInputFileNoOuts) > 0: logList.append(["Input protein-to-category outstats with NO outliers: " + qcInputFileNoOuts]) logList.append(["Output log file: " + logFile]) logList.append(["category-to-all FDR used: %f" % caFDR]) logList.append(["SanXoTSieve mode: " + modeSanXoTSieve]) logList.append([""]) ## END REGION: FILE NAMES SETUP ## ********************************************************** ## REGION: PROGRAM BASIC STRUCTURE listChangingCats = getListChangingCats(caInputFile, caFDR) numRelsChangingCats, numOutliersChangingCats, numOutliersNonChangingCats = getRels(qcInputFile, listChangingCats, qcInputFileNoOuts, modeSanXoTSieve, caseSensitive) # explanation # coord = (C - B)/(C + A) # where # C = numRelsChangingCats = qc-relations pointing to categories changing in ca # B = numOutliersChangingCats = outlier qc-relations in categories changing in ca # A = numOutliersNonChangingCats = outlier qc-relations in categories not changing in ca # hence, B + A = outlier qc-relations in any category coordination = (float(numRelsChangingCats) - float(numOutliersChangingCats)) / (float(numRelsChangingCats) + float(numOutliersNonChangingCats)) ## END REGION: PROGRAM BASIC STRUCTURE ## ********************************************************** ## REGION: SAVING FILES try: message = "Degree of coordination: %f" % coordination logList.append(["Total number of changing categories: %i" % len(listChangingCats)]) logList.append(["Total number of relations pointing to changing categories: %i" % numRelsChangingCats]) logList.append(["Total number of outlier relations pointing to changing categories: %i" % numOutliersChangingCats]) logList.append(["Total number of outlier relations pointing to non-changing categories: %i" % numOutliersNonChangingCats]) logList.append([message]) print "" print "Find more details in the log file, at: %s" % logFile print "" print message print "" except getopt.GetoptError: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")
def main(argv): version = "v1.14" verbose = False similarityLimit = -1.0 # if remain as -1, it will be calculated graphLimits = 6.0 analysisName = "" useSubStats = True defaultAnalysisName = "sanxot" analysisFolder = "" # input files inStats = "" defaultStatsFile = "stats" defaultRelationsFile = "rels" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultDOTExtension = ".gv" relationsFile = "" upperLevelToGraphFile = "" # output files defaultUpperLevelToGraphFile = "ulst" defaultOutputGraph = "simGraph" defaultLogFile = "logFile" defaultSimilarityMatrixFile = "outSimilarities" defaultOutputGVFileName = "simGraph" defaultOutputClusterFileName = "outClusters" logFile = "" graphFile = "" dotFile = "" outCluster = "" similarityMatrixFile = "" graphFileFormat = "png" altMax = 5 selectedNodeColour = "#ff9090" defaultNodeColour = "#ffff80" errorNodeColour = "#8080ff" minColour = "#00ff00" middleColour = "#ffffff" maxColour = "#ff0000" defaultNodeTextColour = "#000000" nonParetoOpacity = 0.5 minFontSize = 10.0 maxFontSize = 70.0 graphDPI = 96.0 graphRatio = 0.0 logList = [["Sanson " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:z:r:c:L:G:m:l:d:e:s:d:g:N:bhk", ["analysis=", "folder=", "place=", "statsfile=", "relfile=", "relationsfile=", "list=", "logfile=", "graphfile=", "simfile=", "graphlimits=", "similarity=", "dotfile=", "outcluster=", "graphformat=", "altmax=", "selectednodecolour=", "selectednodecolor=", "defaultnodecolour=", "defaultnodecolor=", "defaultnodetextcolour=", "defaultnodetextcolor=", "errornodecolour=", "errornodecolor=", "mincolour=", "mincolor=", "middlecolour=", "middlecolor=", "maxcolour=", "maxcolor=","nonparetoopacity=", "minfontsize=", "maxfontsize=", "graphdpi=", "graphratio=", "nosubstats", "help", "egg", "easteregg"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg elif opt in ("-p", "--place", "--folder"): analysisFolder = arg elif opt in ("-z", "--statsfile"): inStats = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-c", "--list"): upperLevelToGraphFile = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-G", "--graphfile"): graphFile = arg elif opt in ("-m", "--simfile"): similarityMatrixFile = arg elif opt in ("-l", "--graphlimits"): graphLimits = float(arg) elif opt in ("-e", "--similarity"): similarityLimit = float(arg) elif opt in ("-d", "--dotfile"): # begin: jmrc dotFile = arg # end: jmrc elif opt in ("-s", "--outcluster"): outCluster = float(arg) elif opt in ("-b", "--nosubstats"): useSubStats = False elif opt in ("--nonparetoopacity"): nonParetoOpacity = float(arg) elif opt in ("-N", "--altmax"): altMax = int(arg) elif opt in ("-g", "--graphformat"): graphFileFormat = arg.lower().strip() if graphFileFormat == "jpeg": graphFileFormat = "jpg" if graphFileFormat != "png" and \ graphFileFormat != "svg" and \ graphFileFormat != "jpg" and \ graphFileFormat != "tif" and \ graphFileFormat != "tiff" and \ graphFileFormat != "pdf" and \ graphFileFormat != "bmp" and \ graphFileFormat != "gif": print() print("Warning: graph format \"%s\" is not supported,\npng will be used instead." % graphFileFormat) print() graphFileFormat = "png" elif opt in("--selectednodecolour", "--selectednodecolor"): selectedNodeColour = arg elif opt in("--defaultnodecolour", "--defaultnodecolor"): defaultNodeColour = arg elif opt in("--defaultnodetextcolour", "--defaultnodetextcolor"): defaultNodeTextColour = arg elif opt in("--errornodecolour", "--errornodecolor"): errorNodeColour = arg elif opt in("--mincolour", "--mincolor"): minColour = arg elif opt in("--middlecolour", "--middlecolor"): middleColour = arg elif opt in("--maxcolour", "--maxcolor"): maxColour = arg elif opt in("--minfontsize"): minFontSize = float(arg) elif opt in("--maxfontsize"): maxFontSize = float(arg) elif opt in("--graphdpi"): graphDPI = float(arg) elif opt in("--graphratio"): graphRatio = float(arg) elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if minFontSize > maxFontSize: minFontSize = maxFontSize defaultGraphExtension = "." + graphFileFormat if len(analysisName) == 0: if len(inStats) > 0: analysisName = os.path.splitext(os.path.basename(inStats))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(inStats) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(inStats)) > 0: analysisFolder = os.path.dirname(inStats) # input if len(inStats) == 0: inStats = os.path.join(analysisFolder, analysisName + "_" + defaultStatsFile + defaultTableExtension) if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0: inStats = os.path.join(analysisFolder, inStats) if len(relationsFile) == 0: relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(upperLevelToGraphFile) == 0: upperLevelToGraphFile = os.path.join(analysisFolder, analysisName + "_" + defaultUpperLevelToGraphFile + defaultTextExtension) if len(os.path.dirname(relationsFile)) == 0 and len(os.path.basename(relationsFile)) > 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(os.path.dirname(upperLevelToGraphFile)) == 0 and len(os.path.basename(upperLevelToGraphFile)) > 0: upperLevelToGraphFile = os.path.join(analysisFolder, upperLevelToGraphFile) # output if len(dotFile) == 0: dotFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGVFileName + defaultDOTExtension) if len(outCluster) == 0: outCluster = os.path.join(analysisFolder, analysisName + "_" + defaultOutputClusterFileName + defaultTableExtension) if len(similarityMatrixFile) == 0: similarityMatrixFile = os.path.join(analysisFolder, analysisName + "_" + defaultSimilarityMatrixFile + defaultTextExtension) if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(graphFile) == 0: graphFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraph + defaultGraphExtension) if len(os.path.dirname(dotFile)) == 0 and len(os.path.basename(dotFile)) > 0: dotFile = os.path.join(analysisFolder, dotFile) if len(os.path.dirname(outCluster)) == 0 and len(os.path.basename(outCluster)) > 0: outCluster = os.path.join(analysisFolder, outCluster) if len(os.path.dirname(similarityMatrixFile)) == 0 and len(os.path.basename(similarityMatrixFile)) > 0: similarityMatrixFile = os.path.join(analysisFolder, similarityMatrixFile) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) if len(os.path.dirname(graphFile)) == 0 and len(os.path.basename(graphFile)) > 0: graphFile = os.path.join(analysisFolder, graphFile) logList.append([""]) logList.append(["Input stats file: " + inStats]) logList.append(["Relations file: " + relationsFile]) logList.append(["File with sigmoids to depict: " + upperLevelToGraphFile]) logList.append(["Output similarity matrix table: " + similarityMatrixFile]) logList.append(["Output log file: " + logFile]) logList.append(["Output graph file: " + graphFile]) logList.append([""]) if useSubStats: logList.append(["Filling nodes with Z from lower elemenets"]) else: logList.append(["Filling nodes with Z from upper elements"]) logList.append([""]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP try: data, extraData, logListExtraInfo = associateElements(inStats = inStats, uFile = upperLevelToGraphFile, relFile = relationsFile) logList.append(logListExtraInfo) logList.append(["Data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) if len(data) == 0: logList.append([""]) errorMessage = "No data were retrieved to create the similarity graph." print(errorMessage) print("Exiting...") logList.append([errorMessage]) else: similarityMatrix, NMatrix = SMatrix(data) stats.saveFile(similarityMatrixFile, similarityMatrix) if useSubStats: # añadir salida para log *** subData = stats.arrangeSubData(inStats = inStats, uFile = upperLevelToGraphFile, relFile = relationsFile, ignoreNaNsInFDR = True) else: subData = None if similarityLimit < 0.0 or similarityLimit > 1.0: # means it has to be calculated # this includes the default value = -1 FNumber, bestBooleanSimMatrix, bestClusterVector, CNumber = \ getBestFNumber(similarityMatrix, verbose = False, stepFNumber = 0.01, initialFNumber = 0.0, finalFNumber = 1.0) logList.append([""]) logList.append(["Creating DOT graph for the best FNumber = %f." % FNumber]) logList.append(["At least %i graphs contain %i nodes." % (CNumber, CNumber)]) else: FNumber, bestBooleanSimMatrix, bestClusterVector, CNumber = \ getBestFNumber(similarityMatrix, verbose = False, stepFNumber = 0.0, initialFNumber = similarityLimit, finalFNumber = similarityLimit) logList.append([""]) logList.append(["Creating DOT graph for the given FNumber = %f." % FNumber]) # begin: jmrc # print("Best FNumber: %f" % FNumber) # end: jmrc paretoInfo, extraDataWithClusters = getParetoInfo(clusterVector = bestClusterVector, extraData = extraData) # stats.saveFile(outCluster, bestClusterVector, "CLUSTERS IDENTIFIED") stats.saveFile(outCluster, extraDataWithClusters, "id\tn\tZ\tFDR\tX\tcluster id\tPareto front?") createDOTGraph(similarityMatrix, simLimit = FNumber, outputGVFile = dotFile, simGraphFile = graphFile, extraData = extraData, subData = subData, NMatrix = NMatrix, graphLimits = graphLimits, graphFileFormat = graphFileFormat, altMax = altMax, defaultNodeColour = defaultNodeColour, errorNodeColour = errorNodeColour, minColour = minColour, middleColour = middleColour, maxColour = maxColour, defaultNodeTextColour = defaultNodeTextColour, nonParetoOpacity = nonParetoOpacity, paretoInfo = paretoInfo, minFontSize = minFontSize, maxFontSize = maxFontSize, graphDPI = graphDPI, graphRatio = graphRatio) stats.saveFile(logFile, logList, "LOG FILE")
def main(argv): version = "v0.01" analysisName = "" analysisFolder = "" relationsFile = "" dataFiles = "" outputFile = "" defaultOutput = "OutStats" defaultOutputInfo = "infoFile" defaultRelationsFile = "rels" defaultDataFiles = "datafile" defaultTableExtension = ".xls" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultAnalysisName = "xpvpAnalysis" infoFile = "" logList = [["XVpCollector " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:r:o:d:h", [ "analysis=", "folder=", "relfile=", "outputfile=", "dataFile=", "help" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-r", "--relfile"): relationsFile = arg if opt in ("-o", "--outputfile"): outputFile = arg if opt in ("-d", "--dataFile"): dataFiles = arg elif opt in ("-h", "--help"): printHelp(version) # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFiles) > 0: analysisName = os.path.splitext(os.path.basename(dataFiles))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFiles) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFiles)) > 0: analysisFolder = os.path.dirname(dataFiles) # input if len(dataFiles) == 0: dataFiles = os.path.join( analysisFolder, analysisName + "_" + defaultDataFiles + defaultTableExtension) if len(os.path.dirname(dataFiles)) == 0 and len(analysisFolder) > 0: dataFiles = os.path.join(analysisFolder, dataFiles) if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0: relationsFile = os.path.join(analysisFolder, relationsFile) # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutput + defaultTableExtension) else: if len(os.path.dirname(outputFile)) == 0: outputFile = os.path.join(analysisFolder, outputFile) if len(infoFile) == 0: infoFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTableExtension) logList.append(["Input dataFiles " + str(dataFiles)]) logList.append(["Input relations file: " + relationsFile]) logList.append(["Input dataFile: " + dataFiles]) logList.append(["Output stats file: " + outputFile]) logList.append(["Output info file: " + infoFile]) outputList = X_VpCollector(dataFiles=dataFiles, relationsFile=relationsFile) header = "idsup\tX'sup,\t,V'sup" #"Sequence\tFASTAProteinDescription\tXp\tVp\tcount\tZp" "idsup\tX'sup,\t,V'sup" #######("Fix the header for Xp and Vp") stats.saveFile(outputFile, outputList, header) if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE")
def main(argv): # begin: jmrc # version = "v1.17" version = "v1.19" # end: jmrc fileName = "" outFile = "" iField = "" jField = "" kField = "" lField = "" c5Field = "" analysisName = "" filterString = "" inputSeparator = "\t" useNumbers = False logicOperatorsAsWords = False # False = Python-style operators (&&, ||), True = word-like operators (\and\ \or\) curlyBrackets = False # False = normal brackets (), True = curly brackets {} analysisFolder = "" defaultFileName = "QuiXML" defaultOutputFile = "table" defaultOutputLog = "log" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultXMLExtension = ".xml" defaultAnalysisName = "aljamia" removeDuplicates = True # begin: jmrc removeEmptyRows = False # end: jmrc allowOperationsInFields = "" tableId = "peptide_match" # default for QuiXML initialRow = 1 # for xls coming from QuiXML should be 25 logFile = "" logList = [["Aljamia " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: # begin: jmrc # opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "word-operators", "inputseparator", "help", "egg", "easteregg"]) opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:e:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "remove-empty=", "word-operators", "inputseparator", "help", "egg", "easteregg"]) # end: jmrc except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-x", "--input", "--filename"): fileName = arg if opt in ("-o", "--output"): outFile = arg elif opt in ("-i", "--c1"): iField = arg.strip() elif opt in ("-j", "--c2"): jField = arg.strip() elif opt in ("-k", "--c3"): kField = arg.strip() elif opt in ("-l", "--c4"): lField = arg.strip() elif opt in ("--c5"): c5Field = arg.strip() elif opt in ("-t", "--table"): tableId = int(arg) # *** check this: int or string? elif opt in ("-R", "--initialrow"): initialRow = int(arg) elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-f", "--filter"): filterString = arg useNumbers = False elif opt in ("-F", "--filter-using-numbers"): filterString = arg useNumbers = True elif opt in ("-A", "--allow-operations"): allowOperationsInFields = str(arg).strip() elif opt in ("-d", "--allow-duplicates"): removeDuplicates = False # begin: jmrc elif opt in ("-e", "--remove-empty"): removeEmptyRows = arg # end: jmrc elif opt in ("-c", "--curly-brackets"): curlyBrackets = True elif opt in ("-w", "--word-operators"): logicOperatorsAsWords = True elif opt in ("-s", "--inputseparator"): inputSeparator = str(arg)[0] elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(fileName) > 0: analysisName = os.path.splitext(os.path.basename(fileName))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(fileName) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(fileName)) > 0: analysisFolder = os.path.dirname(fileName) # input if len(fileName) == 0: fileName = os.path.join(analysisFolder, analysisName + "_" + defaultFileName + defaultXMLExtension) if len(os.path.dirname(fileName)) == 0: fileName = os.path.join(analysisFolder, fileName) # output if len(outFile) == 0: outFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension) else: if len(os.path.dirname(outFile)) == 0: outFile = os.path.join(analysisFolder, outFile) if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputLog + defaultTextExtension) logList.append(["Input file: " + fileName]) logList.append(["Output file: " + outFile]) logList.append(["Log file: " + logFile]) # END REGION: FILE NAMES SETUP # errorsFound = findErrors(xmlDocument, tableId, iField, jField, kField, lField) # if len(errorsFound) > 0: # for error in errorsFound: # print error # sys.exit() # else: # pass if os.path.splitext(fileName)[1] == ".xml": try: #xmlDocument = dom.parse(fileName) xmlDocument = ElementTree.parse(fileName).getroot() except: print("Error while reading xml file2.") logList.append(["Error while reading xml file."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) resultingData = getDataFromXML(xmlDocument, Field, jField, kField, lField, c5Field, tableId = tableId, filterString = filterString, removeDuplicates = removeDuplicates, useNumbers = useNumbers, logicOperatorsAsWords = logicOperatorsAsWords, curlyBrackets = curlyBrackets) else: # then it should be a tsv resultingData = getDataFromTXT(fileName, iField, jField, kField, lField, c5Field, filterString = filterString, removeDuplicates = removeDuplicates, # begin: jmrc removeEmptyRows = removeEmptyRows, # end: jmrc initialRow = initialRow, allowOperationsInFields = allowOperationsInFields, useNumbers = useNumbers, logicOperatorsAsWords = logicOperatorsAsWords, curlyBrackets = curlyBrackets, inputSeparator = inputSeparator) iTab = "" if len(iField) > 0: iTab = "%s\t" % iField jTab = "" if len(jField) > 0: jTab = "%s\t" % jField kTab = "" if len(kField) > 0: kTab = "%s\t" % kField lTab = "" if len(lField) > 0: lTab = "%s\t" % lField c5Tab = "" if len(c5Field) > 0: c5Tab = "%s\t" % c5Field header = iTab + jTab + kTab + lTab + c5Tab header = header[:len(header) - 1] stats.saveFile(outFile, resultingData, header)
def showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, verbose = False, showRank = False, graphFile = None, graphData = None, dpi = None, showGraph = True): plt.clf() inputRawData.sort() inputRelations.sort() windowWidth = medianSide * 2 + 1 if len(inputRawData) < windowWidth: print('Error: window for median is bigger than total input size') sys.exit() # output = makeStats(k, variance, input = input) nextIdXData = getNextIdX_klibrate(inputRawData, inputRelations, k, variance, alpha, giveMergedData = True) nextIdX = nextIdXData[0] mergedData = nextIdXData[1] MADdistrOut, weights = getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide) invOfFitOut = getInverseOfFit(mergedData, k, variance, alpha) MADdistrOut = MADdistrOut[medianSide:len(MADdistrOut) - medianSide + 1] invOfFitOut = invOfFitOut[medianSide:len(invOfFitOut) - medianSide + 1] weights = weights[medianSide:len(weights) - medianSide + 1] # folderToSave = "D:\\DATUMARO\\trabajo\\programas_repositorio\\BioSistemas SanXoT\\MicroArrays\\intento5 CvsCs usando var robusta por partes\\" # stats.saveFile(folderToSave + "MADdistrOut.txt", MADdistrOut) # stats.saveFile(folderToSave + "invOfFitOut.txt", invOfFitOut) # stats.saveFile(folderToSave + "weights.txt", weights) if showRank: plt.plot(list(range(len(MADdistrOut))), MADdistrOut, 'g.', list(range(len(invOfFitOut))), invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0) plt.xlabel('rank($V_s$)') plt.ylabel('1 / MSD') # to save data # *** use a better filename dataToSave = [] for i in range(len(MADdistrOut)): dataToSave.append([i, weights[i], MADdistrOut[i], invOfFitOut[i]]) if graphData: stats.saveFile(graphData, dataToSave, "rank(Vs)\tweight\tMAD\t1/fit") else: # uncomment to graph MSD instead of 1 / MSD # # for i in xrange(len(invOfFitOut)): # invOfFitOut[i] = 1 / invOfFitOut[i] # for i in xrange(len(MADdistrOut)): # MADdistrOut[i] = 1 / MADdistrOut[i] plt.plot(weights, MADdistrOut, 'g.', weights, invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0) plt.xlabel('($V_s$)') plt.ylabel('1 / MSD') plt.grid(True) plt.title('k = %g, $\sigma^2$ = %g, alpha = %g' % (k, variance, alpha)) if graphFile: plt.savefig(graphFile, dpi = dpi) if showGraph: plt.show()
def main(argv): version = "v1.17" fileName = "" outFile = "" iField = "" jField = "" kField = "" lField = "" c5Field = "" analysisName = "" filterString = "" inputSeparator = "\t" useNumbers = False logicOperatorsAsWords = False # False = Python-style operators (&&, ||), True = word-like operators (\and\ \or\) curlyBrackets = False # False = normal brackets (), True = curly brackets {} analysisFolder = "" defaultFileName = "QuiXML" defaultOutputFile = "table" defaultOutputLog = "log" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultXMLExtension = ".xml" defaultAnalysisName = "aljamia" removeDuplicates = True allowOperationsInFields = "" tableId = "peptide_match" # default for QuiXML initialRow = 1 # for xls coming from QuiXML should be 25 logFile = "" logList = [["Aljamia " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "word-operators", "inputseparator", "help", "egg", "easteregg"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-x", "--input", "--filename"): fileName = arg if opt in ("-o", "--output"): outFile = arg elif opt in ("-i", "--c1"): iField = arg.strip() elif opt in ("-j", "--c2"): jField = arg.strip() elif opt in ("-k", "--c3"): kField = arg.strip() elif opt in ("-l", "--c4"): lField = arg.strip() elif opt in ("--c5"): c5Field = arg.strip() elif opt in ("-t", "--table"): tableId = int(arg) # *** check this: int or string? elif opt in ("-R", "--initialrow"): initialRow = int(arg) elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-f", "--filter"): filterString = arg useNumbers = False elif opt in ("-F", "--filter-using-numbers"): filterString = arg useNumbers = True elif opt in ("-A", "--allow-operations"): allowOperationsInFields = str(arg).strip() elif opt in ("-d", "--allow-duplicates"): removeDuplicates = False elif opt in ("-c", "--curly-brackets"): curlyBrackets = True elif opt in ("-w", "--word-operators"): logicOperatorsAsWords = True elif opt in ("-s", "--inputseparator"): inputSeparator = str(arg)[0] elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(fileName) > 0: analysisName = os.path.splitext(os.path.basename(fileName))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(fileName) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(fileName)) > 0: analysisFolder = os.path.dirname(fileName) # input if len(fileName) == 0: fileName = os.path.join(analysisFolder, analysisName + "_" + defaultFileName + defaultXMLExtension) if len(os.path.dirname(fileName)) == 0: fileName = os.path.join(analysisFolder, fileName) # output if len(outFile) == 0: outFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension) else: if len(os.path.dirname(outFile)) == 0: outFile = os.path.join(analysisFolder, outFile) if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputLog + defaultTextExtension) logList.append(["Input file: " + fileName]) logList.append(["Output file: " + outFile]) logList.append(["Log file: " + logFile]) # END REGION: FILE NAMES SETUP # errorsFound = findErrors(xmlDocument, tableId, iField, jField, kField, lField) # if len(errorsFound) > 0: # for error in errorsFound: # print error # sys.exit() # else: # pass if os.path.splitext(fileName)[1] == ".xml": try: #xmlDocument = dom.parse(fileName) xmlDocument = ElementTree.parse(fileName).getroot() except: print("Error while reading xml file.") logList.append(["Error while reading xml file."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) resultingData = getDataFromXML(xmlDocument, Field, jField, kField, lField, c5Field, tableId = tableId, filterString = filterString, removeDuplicates = removeDuplicates, useNumbers = useNumbers, logicOperatorsAsWords = logicOperatorsAsWords, curlyBrackets = curlyBrackets) else: # then it should be a tsv resultingData = getDataFromTXT(fileName, iField, jField, kField, lField, c5Field, filterString = filterString, removeDuplicates = removeDuplicates, initialRow = initialRow, allowOperationsInFields = allowOperationsInFields, useNumbers = useNumbers, logicOperatorsAsWords = logicOperatorsAsWords, curlyBrackets = curlyBrackets, inputSeparator = inputSeparator) iTab = "" if len(iField) > 0: iTab = "%s\t" % iField jTab = "" if len(jField) > 0: jTab = "%s\t" % jField kTab = "" if len(kField) > 0: kTab = "%s\t" % kField lTab = "" if len(lField) > 0: lTab = "%s\t" % lField c5Tab = "" if len(c5Field) > 0: c5Tab = "%s\t" % c5Field header = iTab + jTab + kTab + lTab + c5Tab header = header[:len(header) - 1] stats.saveFile(outFile, resultingData, header) if len(logFile) > 0: stats.saveFile(logFile, logList, "LOG FILE")
def main(argv): version = "v0.08" analysisName = "" analysisFolder = "" relationsFile = "" modifiedPeptidesFile = "" modifiedPeptidesFile1 = "" nonModifiedPep2ProtFile = "" pep2protein = "" varFile = "" varFile1 = "" outputFile = "" defaultModifiedPeptidesFile = "modPepFile" defaultModifiedPeptidesFile1 = "modPepFile1" defaultnonModifiedPeptidesFile = "nonmodPepFile" defaultpep2protein = "Pep2proteinFile" defaultOutput = "ModOutStats" defaultOutputInfo = "infoFile" defaultRelationsFile = "rels" defaultTableExtension = ".xls" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultAnalysisName = "ghostanalysis" infoFile = "" logList = [["SanXoTGhost " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:r:m:M:n:t:o:V:W:L:h", [ "analysis=", "folder=", "relfile=", "modfile=", "modfile1=", "nonmodfile=", "pep2pro=", "outputfile=", "varfile=", "varFile1=", "infofile=", "help" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-m", "--modfile"): modifiedPeptidesFile = arg if opt in ("-M", "--modfile1"): modifiedPeptidesFile1 = arg if opt in ("-n", "--nonmodfile"): nonModifiedPep2ProtFile = arg if opt in ("-t", "--pep2pro"): pep2protein = arg if opt in ("-o", "--outputfile"): outputFile = arg if opt in ("-r", "--relfile"): relationsFile = arg if opt in ("-V", "--varfile"): varFile = arg if opt in ("-W", "--varfile1"): varFile1 = arg if opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-h", "--help"): printHelp(version) # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(modifiedPeptidesFile) > 0: analysisName = os.path.splitext( os.path.basename(modifiedPeptidesFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(modifiedPeptidesFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(modifiedPeptidesFile)) > 0: analysisFolder = os.path.dirname(modifiedPeptidesFile) if len(analysisName) == 0: if len(modifiedPeptidesFile1) > 0: analysisName = os.path.splitext( os.path.basename(modifiedPeptidesFile1))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(modifiedPeptidesFile1) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(modifiedPeptidesFile1)) > 0: analysisFolder = os.path.dirname(modifiedPeptidesFile1) # input if len(modifiedPeptidesFile) == 0: modifiedPeptidesFile = os.path.join( analysisFolder, analysisName + "_" + defaultModifiedPeptidesFile + defaultTableExtension) if len(os.path.dirname( modifiedPeptidesFile)) == 0 and len(analysisFolder) > 0: modifiedPeptidesFile = os.path.join(analysisFolder, modifiedPeptidesFile) if len(modifiedPeptidesFile1) == 0: modifiedPeptidesFile1 = os.path.join( analysisFolder, analysisName + "_" + defaultModifiedPeptidesFile1 + defaultTableExtension) if len(os.path.dirname( modifiedPeptidesFile1)) == 0 and len(analysisFolder) > 0: modifiedPeptidesFile1 = os.path.join(analysisFolder, modifiedPeptidesFile1) if len(nonModifiedPep2ProtFile) == 0: nonModifiedPep2ProtFile = os.path.join( analysisFolder, analysisName + "_" + defaultnonModifiedPeptidesFile + defaultTableExtension) if len(os.path.dirname( nonModifiedPep2ProtFile)) == 0 and len(analysisFolder) > 0: nonModifiedPep2ProtFile = os.path.join(analysisFolder, nonModifiedPep2ProtFile) if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0: relationsFile = os.path.join(analysisFolder, relationsFile) if len(os.path.dirname(varFile)) == 0 and len( os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(os.path.dirname(varFile1)) == 0 and len( os.path.basename(varFile1)) > 0: varFile1 = os.path.join(analysisFolder, varFile1) if len(pep2protein) == 0: pep2protein = os.path.join( analysisFolder, analysisName + "_" + defaultpep2protein + defaultTableExtension) if len(os.path.dirname(pep2protein)) == 0 and len(analysisFolder) > 0: pep2protein = os.path.join(analysisFolder, pep2protein) # ************** reviewed up to here # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutput + defaultTableExtension) else: if len(os.path.dirname(outputFile)) == 0: outputFile = os.path.join(analysisFolder, outputFile) if len(infoFile) == 0: infoFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTableExtension) logList.append(["Input modifiedPeptideFile " + str(modifiedPeptidesFile)]) logList.append( ["Input modifiedPeptideFile1 " + str(modifiedPeptidesFile1)]) logList.append( ["Input nonMod_PepTOpro_File " + str(nonModifiedPep2ProtFile)]) logList.append(["Input pep to protein file " + str(pep2protein)]) logList.append(["Input relations file: " + relationsFile]) logList.append(["Input varianceFile: " + varFile]) logList.append(["Input Second_varianceFile: " + varFile1]) logList.append(["Output stats file: " + outputFile]) logList.append(["Output info file: " + infoFile]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP outputList = ZpCalculator(relationsFile=relationsFile, modifiedPeptidesFile=modifiedPeptidesFile, modifiedPeptidesFile1=modifiedPeptidesFile1, nonModifiedPep2ProtFile=nonModifiedPep2ProtFile, varFile=varFile, varFile1=varFile1, pep2protein=pep2protein, outname=outputFile) header = "idsup\tXsup\tVsup\tidinf\tXinf\tVinf\tn\tZ\tFDR" #"Sequence\tFASTAProteinDescription\tXp\tVp\tcount\tZp" #######("Fix the header for Xp and Vp") stats.saveFile(outputFile, outputList, header) if len(infoFile) > 0: stats.saveFile(infoFile, logList, "INFO FILE")