def replaceRelations(relations, FASTAHeaders = []): newRelations = [] relationsSorted = stats.sortByIndex(relations, 1) FASTAHeadersSorted = stats.sortByIndex(FASTAHeaders, 0) if len(FASTAHeadersSorted) > 0: for relation in relationsSorted: element = relation[1] searchResult = stats.filterByElement(FASTAHeadersSorted, element, index = 0, sort = False) if len(searchResult) > 0: newRelations.append([relation[0], searchResult[0][1]]) else: return relationsSorted return newRelations
def associateElements(inStats = "", uFile = "", relFile = ""): results = [] relations = stats.loadRelationsFile(relFile) relations = stats.sortByIndex(relations, 0) statsData = stats.loadStatsDataFile(inStats) ZijList = [] for element in statsData: ZijList.append([element[3], element[7]]) theorList = [] experList = [] N = len(ZijList) for i in range(N): theorList.append([ZijList[i][0], ZijList[i][1], norm.cdf(float(ZijList[i][1]))]) experList.append([ZijList[i][0], ZijList[i][1], (float(i) + 0.5) / float(N)]) higherElements = stats.load2stringList(uFile, removeCommas = True) # WARNING! higherElements must be a list of lists # with each sublist being id, n, Z, FDR, X # begin: jmrc if not higherElements: sms = "ERROR: higherElements is empty. The higherElements must be a list of lists with each sublist being id, n, Z, FDR, X" sys.exit(sms) # end: jmrc elementList = [] if higherElements[0] == ['id', 'Z', 'n']: # this means the list comes from SanXoTSqueezer # so the header and the extra columns have to be removed for element in higherElements[1:]: # switch to id, n, Z, FDR elementList.append([element[0], element[2], element[1], float("nan"), float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR']: # this means it does not contain X, so a nan is put on its place for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], element[4]]) # otherwise if higherElements[0] != ['id', 'Z', 'n'] and higherElements[0] != ['id', 'n', 'Z', 'FDR'] and higherElements[0] != ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements: elementList.append([element[0], float("nan"), float("nan"), float("nan"), float("nan")]) statsData = stats.sortByIndex(statsData, 7) relationsFirstColumn = stats.extractColumns(relations, 0) relationsSecondColumn = stats.extractColumns(relations, 1) experListFirstColumn = stats.extractColumns(experList, 0) for uElement in elementList: lowerElementList = [] first = stats.firstIndex(relationsFirstColumn, uElement[0]) if first > -1: # -1 means it is not in the list notInList = 0 last = stats.lastIndex(relationsFirstColumn, uElement[0]) lowerElements = relationsSecondColumn[first:last + 1] # "+1" is to include the last one for element in lowerElements: lowerIndex = stats.firstIndex(experListFirstColumn, element) if lowerIndex > -1: # -1 means it is not in the list lowerElementList.append(element) else: notInList += 1 lowerElementList = stats.sortByIndex(lowerElementList, 0) results.append([uElement[0], lowerElementList]) else: if len(uElement[0].strip()) > 0: results.append([uElement[0], None]) return results, elementList, ""
def getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide=100, showGraph=False, verbose=False): MADconstant = 1.48260221850560 # *** 1 / DISTR.NORM.ESTAND.INV(3 / 4) get exact number MADDistribution = [] distrWeight = [] # inputSequences = extractColumns(input, 0) # outputSequences = extractColumns(output, 0) newlist = [] for orow in nextIdX: sequence = orow[0] # it is important to avoid sorting to keep it fast # so in next line do not foget sort = False # this should arrive here already sorted scanListWithSequence = stats.filterByElement(mergedData, sequence, sort=False) if len(scanListWithSequence ) > 1: # otherwise Xi = Xj --> Xi - Xj = 0 --> does not work for scanRow in scanListWithSequence: newrow = [] weight = scanRow[3] # the V degreesOfFreedom = len(scanListWithSequence) XiXj = scanRow[2] - orow[1] newrow.append(sequence) # sequence = 0 newrow.append(scanRow[1]) # scan number = 1 newrow.append(XiXj) # Xi - Xj = 2 newrow.append(weight) # weight = 3 newrow.append( len(scanListWithSequence)) # degrees of freedom = 4 newrow.append( fabs(XiXj) * sqrt( float(degreesOfFreedom) / (float(degreesOfFreedom - 1)))) # = 5 newrow.append(0) # space to save the median = 6 newrow.append(0) # space to save the MAD formula = 7 newlist.append(newrow) newlist = stats.sortByIndex(newlist, 3) # sort by weight # get median + rank nextlist = [] counter = 0 if len(newlist) < medianSide * 2: if verbose: print('Not enough data to perform statistics,') print('len(newlist) = %s, while medianSide = %s' % (str(len(newlist)), str(medianSide))) sys.exit() for i in range(len(newlist))[medianSide:len(newlist) - medianSide]: window = newlist[i - medianSide:i + medianSide + 1] median = stats.medianByIndex(window, 5) newlist[i][6] = median # fill the borders for i in range(len(newlist))[:medianSide]: newlist[i][6] = newlist[medianSide + 1][6] for i in range(len(newlist))[len(newlist) - medianSide:]: newlist[i][6] = newlist[len(newlist) - medianSide - 1][6] # fill MAD formula for i in range(len(newlist)): newlist[i][7] = 1 / (MADconstant * newlist[i][6])**2 MADDistribution.append(newlist[i][7]) distrWeight.append(newlist[i][3]) if verbose: print('k = %f, var = %f' % (k, variance)) return MADDistribution, distrWeight
def getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide = 100, showGraph = False, verbose = False): MADconstant = 1.48260221850560 # *** 1 / DISTR.NORM.ESTAND.INV(3 / 4) get exact number MADDistribution = [] distrWeight = [] # inputSequences = extractColumns(input, 0) # outputSequences = extractColumns(output, 0) newlist = [] for orow in nextIdX: sequence = orow[0] # it is important to avoid sorting to keep it fast # so in next line do not foget sort = False # this should arrive here already sorted scanListWithSequence = stats.filterByElement(mergedData, sequence, sort = False) if len(scanListWithSequence) > 1: # otherwise Xi = Xj --> Xi - Xj = 0 --> does not work for scanRow in scanListWithSequence: newrow = [] weight = scanRow[3] # the V degreesOfFreedom = len(scanListWithSequence) XiXj = scanRow[2] - orow[1] newrow.append(sequence) # sequence = 0 newrow.append(scanRow[1]) # scan number = 1 newrow.append(XiXj) # Xi - Xj = 2 newrow.append(weight) # weight = 3 newrow.append(len(scanListWithSequence)) # degrees of freedom = 4 newrow.append(fabs(XiXj) * sqrt(float(degreesOfFreedom) / (float(degreesOfFreedom - 1)))) # = 5 newrow.append(0) # space to save the median = 6 newrow.append(0) # space to save the MAD formula = 7 newlist.append(newrow) newlist = stats.sortByIndex(newlist, 3) # sort by weight # get median + rank nextlist = [] counter = 0 if len(newlist) < medianSide * 2: if verbose: print('Not enough data to perform statistics,') print('len(newlist) = %s, while medianSide = %s' % (str(len(newlist)), str(medianSide))) sys.exit() for i in range(len(newlist))[medianSide:len(newlist) - medianSide]: window = newlist[i - medianSide:i + medianSide + 1] median = stats.medianByIndex(window, 5) newlist[i][6] = median # fill the borders for i in range(len(newlist))[:medianSide]: newlist[i][6] = newlist[medianSide + 1][6] for i in range(len(newlist))[len(newlist) - medianSide:]: newlist[i][6] = newlist[len(newlist) - medianSide - 1][6] # fill MAD formula for i in range(len(newlist)): newlist[i][7] = 1 / (MADconstant * newlist[i][6]) ** 2 MADDistribution.append(newlist[i][7]) distrWeight.append(newlist[i][3]) if verbose: print('k = %f, var = %f' % (k, variance)) return MADDistribution, distrWeight
def main(options, programName, programVersion): ## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION # basic default info logFile = "" analysisName = "" analysisFolder = "" defaultAnalysisName = programName.lower() relFile = "" DBFile = "" FASTAFile = "" previousFile = "" accNumCol = 1 catCol = 2 catPrefix = "" header = "idsup\tidinf" previousList = [] # default extensions defaultTableExtension = ".tsv" defaultTextExtension = ".txt" # default file names defaultLogFile = "logFile" defaultRelFile = "rels" # basic log file logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] # parsing arguments from commandline options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.") options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.") options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.") options.add_argument("-d", "--dbfile", type = str, default = "", required = True, help = "The input file containing accession numbers and categories.") options.add_argument("-x", "--previousfile", type = str, default = "", required = False, help = "An optional relation file to which concatenate resulting relations (if omitted, a new file will be produced).") options.add_argument("-q", "--accnumcol", type = str, default = "1", required = False, help = "Column where accession numbers of genes/proteins are. First column is 1. Default is 1.") options.add_argument("-c", "--categorycol", type = str, default = "2", required = False, help = "Column where categories are. First column is 1. Default is 2.") options.add_argument("-f", "--prefix", type = str, default = "", required = False, help = "Prefix to add to all categories found in this parsing (such as \"GO-full_\", \"Panther_\", or \"KEGG=2017-01-10_\".") options.add_argument("--fasta", type = str, default = "", required = False, help = "FASTA file contaning the identifiers we want to replace by FASTA headers in the final file. Note that identifiers not appearing in this FASTA file will be removed from the final list.") # add string with category separator # add string with accNum separator # *-*-* add easter egg if wanted arguments = options.parse_args() # copying parsed arguments # copy any arguments used if len(arguments.analysis) > 0: analysisName = arguments.analysis if len(arguments.place) > 0: analysisFolder = arguments.place if len(arguments.logfile) > 0: logFile = arguments.logfile if len(arguments.dbfile) > 0: DBFile = arguments.dbfile if len(arguments.fasta) > 0: FASTAFile = arguments.fasta if len(arguments.previousfile) > 0: previousFile = arguments.previousfile if len(arguments.accnumcol) > 0: accNumCol = int(arguments.accnumcol) if len(arguments.categorycol) > 0: catCol = int(arguments.categorycol) if len(arguments.prefix) > 0: catPrefix = arguments.prefix ## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION ## ********************************************************** ## REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(DBFile) > 0: analysisName = os.path.splitext(os.path.basename(DBFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # input if len(os.path.dirname(DBFile)) == 0: DBFile = os.path.join(analysisFolder, DBFile) if len(previousFile) > 0: if len(os.path.dirname(previousFile)) == 0: previousFile = os.path.join(analysisFolder, previousFile) if len(FASTAFile) > 0: if len(os.path.dirname(FASTAFile)) == 0: FASTAFile = os.path.join(analysisFolder, FASTAFile) # output if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) if len(relFile) == 0: relFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelFile + defaultTableExtension) if len(os.path.dirname(relFile)) == 0 and len(os.path.basename(relFile)) > 0: relFile = os.path.join(analysisFolder, relFile) logList.append([""]) logList.append(["Input table with categories and proteins: " + DBFile]) if len(previousFile) > 0: logList.append(["Previous file to which new qc relations are added: " + previousFile]) if len(FASTAFile) > 0: logList.append(["FASTA file to replace identifiers for FASTA headers: " + FASTAFile]) logList.append(["Category column: %i, protein column: %i" % (catCol, accNumCol)]) logList.append(["Prefix added to categories: " + catPrefix]) logList.append(["Output relations file: " + relFile]) logList.append(["Output log file: " + logFile]) logList.append([""]) ## END REGION: FILE NAMES SETUP ## ********************************************************** ## REGION: PROGRAM BASIC STRUCTURE if len(previousFile) > 0: #otherwise, previousList = [] previousList = stats.load2stringList(previousFile, removeCommas = True) header = "" AccNum2FASTAHeader = getFASTAHeaders(FASTAFile) gc.collect() DBList = stats.load2stringList(DBFile, removeCommas = True) newRelations = getRelations(bigTable = DBList, qCol = accNumCol, cCol = catCol, cPrefix = catPrefix, FASTAHeaders = AccNum2FASTAHeader) newRelationsSorted = stats.sortByIndex(newRelations, 0) relationList = previousList + newRelationsSorted gc.collect() ## END REGION: PROGRAM BASIC STRUCTURE ## ********************************************************** ## REGION: SAVING FILES try: stats.saveFile(relFile, relationList, header) logList.append(["Everything went fine."]) stats.saveFile(logFile, logList, "LOG FILE") except getopt.GetoptError: logList.append(["Error."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2)
def main(argv): version = "v0.05" analysisName = "" analysisFolder = "" logFile = "" # in data prefix = "" extraPrefix = "" medianTag = "med" # default filenames defaultInfoFileSuffix = "_infoFile.txt" defaultLogFile = "logFile" defaultAnalysisName = "medianSelection" # default extensions defaultTableExtension = ".xls" defaultTextExtension = ".txt" verbose = True logList = [["Anselmo " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:f:g:m:L:hH", ["analysis=", "folder=", "prefix=", "extraprefix=", "mediantag=", "logfile=", "help", "egg", "easteregg"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version, True) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg elif opt in ("-p", "--place", "--folder"): analysisFolder = arg elif opt in ("-f", "--prefix"): prefix = arg elif opt in ("-g", "--extraprefix"): extraPrefix = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-m", "--mediantag"): medianTag = arg elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced = True) sys.exit() elif opt in ("--egg", "--easteregg"): easterEgg() sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart # next "if" disables extra copy when extraPrefix is same as prefix if len(extraPrefix) > 0 and extraPrefix == prefix: extraPrefix = "" # input # output if len(logFile) == 0: logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) ##logList.append(["Median variance = " + "poner***"]) # END REGION: FILE NAMES SETUP # get infoFile list infoFileList = glob.glob(os.path.join(analysisFolder, prefix + "*" + defaultInfoFileSuffix)) logList.append([]) logList.append(["Folder = " + analysisFolder]) logList.append([]) logList.append(["Info files with prefix \"%s\"" % prefix]) varList = [] for varFile in infoFileList: variance, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = False) if varianceOk: varList.append([varFile, variance]) # get info file with median variance varList = stats.sortByIndex(varList, 1) medianVariance = stats.medianByIndex(varList, 1) medianIndex = getMedianIndex(varList = varList, variance = medianVariance) for element in varList: if element[0] == varList[medianIndex][0]: logList.append(["%s, variance = %f [taken]" % (os.path.basename(element[0]), element[1])]) else: logList.append(["%s, variance = %f" % (os.path.basename(element[0]), element[1])]) # get prefix of median experiment medianInfoFile = os.path.basename(varList[medianIndex][0]) randTag = medianInfoFile[len(prefix):len(medianInfoFile) - len(defaultInfoFileSuffix)] medianPrefix = prefix + randTag extraMedianPrefix = "" if len(extraPrefix) > 0: extraMedianPrefix = extraPrefix + randTag # get file list with specific prefix medianExperimentFileList = glob.glob(os.path.join(analysisFolder, medianPrefix + "*.*")) extraPrefixFileList = [] if len(extraMedianPrefix) > 0: extraPrefixFileList = glob.glob(os.path.join(analysisFolder, extraMedianPrefix + "*.*")) # copy files including median tag extraLogList = copyFilesWithPrefix(fileList = medianExperimentFileList, folder = analysisFolder, prefix = prefix, message = "Renamed files:", tag = medianTag) logList.extend(extraLogList) if len(extraPrefixFileList) > 0: extraLogList = copyFilesWithPrefix(fileList = extraPrefixFileList, folder = analysisFolder, prefix = extraPrefix, message = "Renamed extra files:", tag = medianTag) logList.extend(extraLogList) # save logFile stats.saveFile(logFile, logList, "INFO FILE")
def getRels(qcInputFile = "", listChangingCats = [], qcInputNoOutsFile = "", modeSanXoTSieve = "newWay", caseSensitive = True, outlierTag = "out"): qcInputRawList = [] qcInput = [] qcInputNoOutsRawList = [] qcInputNoOuts = [] numRelsChangingCats = 0 numOutliersChangingCats = 0 numOutliersNonChangingCats = 0 # lists of lists for filterByElement, needed to speed it up qcInputSortedList = [] qcInputNoOutsSortedList = [] listChangingCatsList = [] listChangingCatsSortedList = [] # when no qcInputFileNoOuts file is present, the newWay option is used # this has already been sorted previously, but just in case... if len(qcInputNoOutsFile) == 0: modeSanXoTSieve = "newWay" qcInputRawList = stats.loadStatsDataFile(qcInputFile, FDRasText = True, ZasText = True, includeTags = True) # next line is needed to nest list within list and make it work with the filterByElement method for cat in listChangingCats: if caseSensitive: listChangingCatsList.append([cat]) else: listChangingCatsList.append([cat.lower()]) # important NOT to sort listChangingCats, as this is not a list of lists and # sorting would only affect the first character instead of the first string listChangingCatsSortedList = stats.sortByIndex(listChangingCatsList, 0) # get list of rels # next line is needed to nest list within list and make it work with the filterByElement method for qc in qcInputRawList: if caseSensitive: qcInput.append([[qc[0], qc[3], qc[9]]]) else: qcInput.append([[qc[0].lower(), qc[3].lower(), qc[9]]]) qcInputSortedList = stats.sortByIndex(qcInput, 0) if modeSanXoTSieve == "newWay": for qc in qcInputSortedList: if len(stats.filterByElement(listChangingCatsSortedList, qc[0][0], sort = False)) > 0: # get list of rels pointing to changing cats # get outlier rels numRelsChangingCats += 1 if stats.tagIsPresent(qc[0][2], outlierTag): numOutliersChangingCats += 1 else: # relations pointing to non changing cats if stats.tagIsPresent(qc[0][2], outlierTag): # outliers pointing to non changing cats numOutliersNonChangingCats += 1 if modeSanXoTSieve == "oldWay": # quitar si sale bien sacándolo fuera # # next line is needed to nest list within list and make it work with the filterByElement method # for qc in qcInputRawList: # # if modeSanXoTSieve == "oldWay" and len(qc) # if caseSensitive: # qcInput.append([[qc[0], qc[3]]]) # else: # qcInput.append([[qc[0].lower(), qc[3].lower()]]) # qcInputSortedList = stats.sortByIndex(qcInput, 0) if len(qcInputNoOutsFile) > 0: qcInputNoOutsRawList = stats.loadStatsDataFile(qcInputNoOutsFile, FDRasText = True, ZasText = True, includeTags = False) # next line is needed to nest list within list and make it work with the filterByElement method for qcno in qcInputNoOutsRawList: if caseSensitive: qcInputNoOuts.append([[qcno[0], qcno[3]]]) else: qcInputNoOuts.append([[qcno[0].lower(), qcno[3].lower()]]) qcInputNoOutsSortedList = stats.sortByIndex(qcInputNoOuts, 0) print print "calculating with %i relations and %i changing categories..." % (len(qcInputSortedList), len(listChangingCats)) for qc in qcInputSortedList: # better do not use something like "if x in list..." because that is quite slow if len(stats.filterByElement(listChangingCatsSortedList, qc[0][0], sort = False)) > 0: # is the category qc[0] in listChangingCatsSorted? If no --> 0 # this relation points to a changing category numRelsChangingCats += 1 if len(stats.filterByElement(qcInputNoOutsSortedList, qc[0][0:2], sort = False)) == 0: # is the relation qc in qcInputNoOuts? If no --> 0 # this relation is an outlier in a changing category # the [0:2] part is to remove the space for tags numOutliersChangingCats += 1 else: # this relation points to a non-changing category if len(stats.filterByElement(qcInputNoOutsSortedList, qc[0][0:2], sort = False)) == 0: # is the relation qc in qcInputNoOuts? If no --> 0 # this relation is an outlier in a non-changing category numOutliersNonChangingCats += 1 return numRelsChangingCats, numOutliersChangingCats, numOutliersNonChangingCats
def main(argv): version = "v0.14" verbose = False analysisName = "" defaultAnalysisName = "squeeze" analysisFolder = "" # parametres minimumElements = 2 maximumElements = 1e6 maximumFDR = 0.05 minimumZ = 0.0 # take all by default filterByFDR = True # if false, then it filters by abs(Z) # input files lowerStats = "" higherStats = "" defaultLowerStatsFile = "lower" defaultHigherStatsFile = "upper" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" defaultOutputFile = "outList" defaultLogFile = "logFile" # output files logFile = "" outputFile = "" logList = [["SanXoTSqueezer " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:l:L:o:p:u:n:N:f:z:h", [ "analysis=", "lowerstats=", "logfile=", "outputfile=", "place=", "minelements=", "maxelements=", "fdr=", "sigmas=", "help" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-l", "--lowerstats"): lowerStats = arg if opt in ("-u", "--upperstats"): higherStats = arg elif opt in ("-L", "--logfile"): logFile = arg elif opt in ("-o", "--outputfile"): outputFile = arg elif opt in ("-n", "--minelements"): minimumElements = int(arg) elif opt in ("-N", "--maxelements"): maximumElements = int(arg) elif opt in ("-f", "--fdr"): maximumFDR = float(arg) elif opt in ("-z", "--sigmas"): filterByFDR = False minimumZ = float(arg) elif opt in ("-h", "--help"): printHelp(version) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(lowerStats) > 0: analysisName = os.path.splitext(os.path.basename(lowerStats))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(lowerStats) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(lowerStats)) > 0: analysisFolder = os.path.dirname(lowerStats) # input if len(lowerStats) == 0: lowerStats = os.path.join( analysisFolder, analysisName + "_" + defaultLowerStatsFile + defaultTableExtension) if len(higherStats) == 0: higherStats = os.path.join( analysisFolder, analysisName + "_" + defaultHigherStatsFile + defaultTableExtension) if len(os.path.dirname(lowerStats)) == 0 and len(analysisFolder) > 0: lowerStats = os.path.join(analysisFolder, lowerStats) if len(os.path.dirname(higherStats)) == 0 and len(analysisFolder) > 0: higherStats = os.path.join(analysisFolder, higherStats) # output if len(outputFile) == 0: outputFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension) if len(logFile) == 0: logFile = os.path.join( analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension) if len(os.path.dirname(outputFile)) == 0 and len( os.path.basename(outputFile)) > 0: outputFile = os.path.join(analysisFolder, outputFile) if len(os.path.dirname(logFile)) == 0 and len( os.path.basename(logFile)) > 0: logFile = os.path.join(analysisFolder, logFile) logList.append([""]) logList.append(["Lower input stats file: " + lowerStats]) logList.append(["Higher input stats file: " + higherStats]) logList.append(["Output list: " + outputFile]) logList.append(["Output log file: " + logFile]) logList.append( ["Minimum elements in higher category: " + str(minimumElements)]) logList.append( ["Maximum elements in higher category: " + str(maximumElements)]) logList.append(["Minimum z: " + str(minimumZ)]) logList.append([""]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP try: lowerData = stats.loadStatsDataFile(lowerStats) logList.append(["Lower data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting lower data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: higherData = stats.loadStatsDataFile(higherStats) logList.append(["Higher data files correctly loaded."]) except getopt.GetoptError: logList.append(["Error while getting higher data files."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: filteredList = filterNFDRorZ(lowerData, higherData, minN=minimumElements, maxN=maximumElements, minZ=minimumZ, maxFDR=maximumFDR, useFDR=filterByFDR) filteredList = stats.sortByIndex(filteredList, 1) logList.append(["Data correctly filtered."]) except getopt.GetoptError: logList.append(["Error while getting data filtered by N and Z."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) try: stats.saveFile(outputFile, filteredList, "id\tn\tZ\tFDR\tX") logList.append(["Output data correctly saved."]) except getopt.GetoptError: logList.append(["Error while saving output data."]) stats.saveFile(logFile, logList, "LOG FILE") sys.exit(2) stats.saveFile(logFile, logList, "LOG FILE")