def main(argv): version = "v0.17" analysisName = "" analysisFolder = "" varianceSeed = 0.001 FDRLimit = 0.01 varianceSeedProvided = False removeDuplicateUpper = False tags = "!out" outlierTag = "out" logicOperatorsAsWords = False dataFile = "" relationsFile = "" newRelFile = "" removedRelFile = "" defaultDataFile = "data" defaultRelationsFile = "rels" defaultTaggedRelFile = "tagged" defaultNewRelFile = "cleaned" defaultRemovedRelFile = "outliers" defaultOutputInfo = "infoFile" infoFile = "" varFile = "" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" verbose = True oldWay = False # instead of tagging outliers, separating relations files, the old way modeUsed = mode.onePerHigher logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", ["analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators"]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-n", "--newrelfile"): removedRelFile = arg elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-u", "--one-to-one"): modeUsed = mode.onlyOne elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("--oldway"): oldWay = True elif opt in ("-f", "--fdrlimit"): FDRLimit = float(arg) elif opt in ("-D", "--removeduplicateupper"): removeDuplicateUpper = True elif opt in ("--tags"): if arg.strip().lower() != "!out": tags = "!out&(" + arg + ")" elif opt in ("--word-operators"): logicOperatorsAsWords = True elif opt in ("--outliertag"): outlierTag = "out" elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced = True) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed) if not varianceOk: logList.append(["Variance not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(relationsFile) == 0: relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) # output if len(newRelFile) == 0: if oldWay: # suffix: "cleaned" newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension) else: # suffix: "tagged" newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension) if len(removedRelFile) == 0: removedRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension) if len(os.path.dirname(newRelFile)) == 0: newRelFile = os.path.join(analysisFolder, newRelFile) if len(os.path.dirname(removedRelFile)) == 0: removedRelFile = os.path.join(analysisFolder, removedRelFile) if len(infoFile) == 0: infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) if oldWay: logList.append(["Output relations file without outliers: " + newRelFile]) logList.append(["Output relations file with outliers only: " + removedRelFile]) logList.append(["Removing duplicate higher level elements: " + str(removeDuplicateUpper)]) logList.append(["OldWay option activated: outliers are removed instead of tagged"]) else: logList.append(["Relations file tagging outliers: " + newRelFile]) logList.append(["Tags to filter relations: " + tags]) logList.append(["Tag used for outliers: " + outlierTag]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP relations = stats.loadRelationsFile(relationsFile) data = stats.loadInputDataFile(dataFile) if oldWay: # only for backward compatibility. Note that tags are not supported newRelations, removedRelations, logResults = \ getRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper) else: newRelations, removedRelations, logResults = \ tagRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper, tags = tags, outlierTag = outlierTag, logicOperatorsAsWords = logicOperatorsAsWords) if oldWay: stats.saveFile(newRelFile, newRelations, "idsup\tidinf") else: stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags") stats.saveFile(infoFile, logList, "INFO FILE") if oldWay: stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
def associateElements(inStats = "", uFile = "", relFile = ""): results = [] relations = stats.loadRelationsFile(relFile) relations = stats.sortByIndex(relations, 0) statsData = stats.loadStatsDataFile(inStats) ZijList = [] for element in statsData: ZijList.append([element[3], element[7]]) theorList = [] experList = [] N = len(ZijList) for i in range(N): theorList.append([ZijList[i][0], ZijList[i][1], norm.cdf(float(ZijList[i][1]))]) experList.append([ZijList[i][0], ZijList[i][1], (float(i) + 0.5) / float(N)]) higherElements = stats.load2stringList(uFile, removeCommas = True) # WARNING! higherElements must be a list of lists # with each sublist being id, n, Z, FDR, X # begin: jmrc if not higherElements: sms = "ERROR: higherElements is empty. The higherElements must be a list of lists with each sublist being id, n, Z, FDR, X" sys.exit(sms) # end: jmrc elementList = [] if higherElements[0] == ['id', 'Z', 'n']: # this means the list comes from SanXoTSqueezer # so the header and the extra columns have to be removed for element in higherElements[1:]: # switch to id, n, Z, FDR elementList.append([element[0], element[2], element[1], float("nan"), float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR']: # this means it does not contain X, so a nan is put on its place for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], element[4]]) # otherwise if higherElements[0] != ['id', 'Z', 'n'] and higherElements[0] != ['id', 'n', 'Z', 'FDR'] and higherElements[0] != ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements: elementList.append([element[0], float("nan"), float("nan"), float("nan"), float("nan")]) statsData = stats.sortByIndex(statsData, 7) relationsFirstColumn = stats.extractColumns(relations, 0) relationsSecondColumn = stats.extractColumns(relations, 1) experListFirstColumn = stats.extractColumns(experList, 0) for uElement in elementList: lowerElementList = [] first = stats.firstIndex(relationsFirstColumn, uElement[0]) if first > -1: # -1 means it is not in the list notInList = 0 last = stats.lastIndex(relationsFirstColumn, uElement[0]) lowerElements = relationsSecondColumn[first:last + 1] # "+1" is to include the last one for element in lowerElements: lowerIndex = stats.firstIndex(experListFirstColumn, element) if lowerIndex > -1: # -1 means it is not in the list lowerElementList.append(element) else: notInList += 1 lowerElementList = stats.sortByIndex(lowerElementList, 0) results.append([uElement[0], lowerElementList]) else: if len(uElement[0].strip()) > 0: results.append([uElement[0], None]) return results, elementList, ""
def calibrate(inputRawData=None, inputRelations=None, rawDataFile="", relationsFile="", kSeed=1, varianceSeed=0.001, medianSide=100, maxIterations=0, verbose=False, showGraph=False, showSumSQ=False, forceParameters=False, alphaSeed=1.0, showRank=True, useCooperativity=False, graphFileVRank="", graphFileVValue="", graphDataFile="", graphDPI=None): extraLog = [] extraLog.append([]) if verbose: print() print("loading input raw data file") extraLog.append(["loading input raw data file"]) if inputRawData == None: if len(rawDataFile) == 0: print("Error: no input raw data") extraLog.append(["Error: no input raw data"]) else: inputRawData = stats.loadInputDataFile(rawDataFile) if verbose: print("loading relations file") extraLog.append(["loading relations file"]) if inputRelations == None: if len(relationsFile) == 0: if not forceParameters: print("Error: no relations file") extraLog.append(["Error: no relations file"]) return None, None, None, None, extraLog else: inputRelations = stats.loadRelationsFile(relationsFile) #### calculate k and variance #### alpha = 1.0 if not forceParameters: if verbose: print("calculating K and variance") extraLog.append(["calculating K and variance"]) # *** just to see it result = getKandVariance(inputRawData, inputRelations, kSeed=kSeed, varianceSeed=varianceSeed, maxIterations=maxIterations, verbose=True, showSumSQ=True, medianSide=medianSide, alphaSeed=alphaSeed, useCooperativity=useCooperativity) k = result[0] variance = result[1] if useCooperativity: alpha = result[2] else: k = kSeed variance = varianceSeed alpha = alphaSeed # save VRank graph showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank=True, graphFile=graphFileVRank, graphData=graphDataFile, dpi=graphDPI, showGraph=showGraph) # save VValue graph showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank=False, graphFile=graphFileVValue, dpi=graphDPI, showGraph=showGraph) # get calibrated idXV idXV = idXVcal(inputRawData, k, alpha) return idXV, variance, k, alpha, extraLog
def calibrate(inputRawData = None, inputRelations = None, rawDataFile = "", relationsFile = "", kSeed = 1, varianceSeed = 0.001, medianSide = 100, maxIterations = 0, verbose = False, showGraph = False, showSumSQ = False, forceParameters = False, alphaSeed = 1.0, showRank = True, useCooperativity = False, graphFileVRank = "", graphFileVValue = "", graphDataFile = "", graphDPI = None): extraLog = [] extraLog.append([]) if verbose: print() print("loading input raw data file") extraLog.append(["loading input raw data file"]) if inputRawData == None: if len(rawDataFile) == 0: print("Error: no input raw data") extraLog.append(["Error: no input raw data"]) else: inputRawData = stats.loadInputDataFile(rawDataFile) if verbose: print("loading relations file") extraLog.append(["loading relations file"]) if inputRelations == None: if len(relationsFile) == 0: if not forceParameters: print("Error: no relations file") extraLog.append(["Error: no relations file"]) return None, None, None, None, extraLog else: inputRelations = stats.loadRelationsFile(relationsFile) #### calculate k and variance #### alpha = 1.0 if not forceParameters: if verbose: print("calculating K and variance") extraLog.append(["calculating K and variance"]) # *** just to see it result = getKandVariance(inputRawData, inputRelations, kSeed = kSeed, varianceSeed = varianceSeed, maxIterations = maxIterations, verbose = True, showSumSQ = True, medianSide = medianSide, alphaSeed = alphaSeed, useCooperativity = useCooperativity) k = result[0] variance = result[1] if useCooperativity: alpha = result[2] else: k = kSeed variance = varianceSeed alpha = alphaSeed # save VRank graph showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = True, graphFile = graphFileVRank, graphData = graphDataFile, dpi = graphDPI, showGraph = showGraph) # save VValue graph showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = False, graphFile = graphFileVValue, dpi = graphDPI, showGraph = showGraph) # get calibrated idXV idXV = idXVcal(inputRawData, k, alpha) return idXV, variance, k, alpha, extraLog
def main(argv): version = "v0.17" analysisName = "" analysisFolder = "" varianceSeed = 0.001 FDRLimit = 0.01 varianceSeedProvided = False removeDuplicateUpper = False tags = "!out" outlierTag = "out" logicOperatorsAsWords = False dataFile = "" relationsFile = "" newRelFile = "" removedRelFile = "" defaultDataFile = "data" defaultRelationsFile = "rels" defaultTaggedRelFile = "tagged" defaultNewRelFile = "cleaned" defaultRemovedRelFile = "outliers" defaultOutputInfo = "infoFile" infoFile = "" varFile = "" defaultTableExtension = ".tsv" defaultTextExtension = ".txt" defaultGraphExtension = ".png" verbose = True oldWay = False # instead of tagging outliers, separating relations files, the old way modeUsed = mode.onePerHigher logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]] try: opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", [ "analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators" ]) except getopt.GetoptError: logList.append(["Error while getting parameters."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit(2) if len(opts) == 0: printHelp(version) sys.exit() for opt, arg in opts: if opt in ("-a", "--analysis"): analysisName = arg if opt in ("-p", "--place", "--folder"): analysisFolder = arg if opt in ("-v", "--var", "--varianceseed"): varianceSeed = float(arg) varianceSeedProvided = True elif opt in ("-d", "--datafile"): dataFile = arg elif opt in ("-r", "--relfile", "--relationsfile"): relationsFile = arg elif opt in ("-n", "--newrelfile"): removedRelFile = arg elif opt in ("-L", "--infofile"): infoFile = arg elif opt in ("-V", "--varfile"): varFile = arg elif opt in ("-u", "--one-to-one"): modeUsed = mode.onlyOne elif opt in ("-b", "--no-verbose"): verbose = False elif opt in ("--oldway"): oldWay = True elif opt in ("-f", "--fdrlimit"): FDRLimit = float(arg) elif opt in ("-D", "--removeduplicateupper"): removeDuplicateUpper = True elif opt in ("--tags"): if arg.strip().lower() != "!out": tags = "!out&(" + arg + ")" elif opt in ("--word-operators"): logicOperatorsAsWords = True elif opt in ("--outliertag"): outlierTag = "out" elif opt in ("-h", "--help"): printHelp(version) sys.exit() elif opt in ("-H", "--advanced-help"): printHelp(version, advanced=True) sys.exit() # REGION: FILE NAMES SETUP if len(analysisName) == 0: if len(dataFile) > 0: analysisName = os.path.splitext(os.path.basename(dataFile))[0] else: analysisName = defaultAnalysisName if len(os.path.dirname(analysisName)) > 0: analysisNameFirstPart = os.path.dirname(analysisName) analysisName = os.path.basename(analysisName) if len(analysisFolder) == 0: analysisFolder = analysisNameFirstPart if len(dataFile) > 0 and len(analysisFolder) == 0: if len(os.path.dirname(dataFile)) > 0: analysisFolder = os.path.dirname(dataFile) # input if len(dataFile) == 0: dataFile = os.path.join( analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension) if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0: dataFile = os.path.join(analysisFolder, dataFile) if len(os.path.dirname(varFile)) == 0 and len( os.path.basename(varFile)) > 0: varFile = os.path.join(analysisFolder, varFile) if len(varFile) > 0 and not varianceSeedProvided: varianceSeed, varianceOk = stats.extractVarianceFromVarFile( varFile, verbose=verbose, defaultSeed=varianceSeed) if not varianceOk: logList.append(["Variance not found in text file."]) stats.saveFile(infoFile, logList, "INFO FILE") sys.exit() if len(relationsFile) == 0: relationsFile = os.path.join( analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension) if len(os.path.dirname(relationsFile)) == 0: relationsFile = os.path.join(analysisFolder, relationsFile) # output if len(newRelFile) == 0: if oldWay: # suffix: "cleaned" newRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension) else: # suffix: "tagged" newRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension) if len(removedRelFile) == 0: removedRelFile = os.path.join( analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension) if len(os.path.dirname(newRelFile)) == 0: newRelFile = os.path.join(analysisFolder, newRelFile) if len(os.path.dirname(removedRelFile)) == 0: removedRelFile = os.path.join(analysisFolder, removedRelFile) if len(infoFile) == 0: infoFile = os.path.join( analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension) logList.append(["Variance seed = " + str(varianceSeed)]) logList.append(["Input data file: " + dataFile]) logList.append(["Input relations file: " + relationsFile]) if oldWay: logList.append( ["Output relations file without outliers: " + newRelFile]) logList.append( ["Output relations file with outliers only: " + removedRelFile]) logList.append([ "Removing duplicate higher level elements: " + str(removeDuplicateUpper) ]) logList.append([ "OldWay option activated: outliers are removed instead of tagged" ]) else: logList.append(["Relations file tagging outliers: " + newRelFile]) logList.append(["Tags to filter relations: " + tags]) logList.append(["Tag used for outliers: " + outlierTag]) # pp.pprint(logList) # sys.exit() # END REGION: FILE NAMES SETUP relations = stats.loadRelationsFile(relationsFile) data = stats.loadInputDataFile(dataFile) if oldWay: # only for backward compatibility. Note that tags are not supported newRelations, removedRelations, logResults = \ getRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper) else: newRelations, removedRelations, logResults = \ tagRelationsWithoutOutliers(data, relations, varianceSeed, FDRLimit = FDRLimit, modeUsed = modeUsed, removeDuplicateUpper = removeDuplicateUpper, tags = tags, outlierTag = outlierTag, logicOperatorsAsWords = logicOperatorsAsWords) if oldWay: stats.saveFile(newRelFile, newRelations, "idsup\tidinf") else: stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags") stats.saveFile(infoFile, logList, "INFO FILE") if oldWay: stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
def ZpCalculator(relationsFile, modifiedPeptidesFile, modifiedPeptidesFile1, nonModifiedPep2ProtFile, pep2protein, outname, varFile, varFile1): folderout = outname.split("\\") OutfolderLocation = "\\".join(folderout[:-1]) norm_wo_V = [] norm_wo_W = [] main_file = [] main_file1 = [] main_file_FINAL = [] Norm_V = [["idinf", "\t", "X'inf", "\t", "Vinf", "\n"]] Norm_W = [["idinf", "\t", "X'inf", "\t", "Winf", "\n"]] W = open(OutfolderLocation + "\peptide2protein_Norm_W.txt", "w") V = open(OutfolderLocation + "\peptide2protein_Norm_V.txt", "w") dic_seq = stats.load2dictionary( modifiedPeptidesFile, keyNum=0, n1=1, n2=2) ################# wo file ########### dic_dis = stats.load2dictionary( nonModifiedPep2ProtFile, keyNum=3, n1=1, n2=4, n3=5, n4=6) ######### protein to all file ######## dic_seq1 = stats.load2dictionary(modifiedPeptidesFile1, keyNum=0, n1=1, n2=2) ################ W file ######### W_seq = stats.load2dictionary(pep2protein, keyNum=0, n1=6) ###nonmod peptide to protein#### #print W_seq #pdb.set_trace() variance, varianceOk = stats.extractVarianceFromVarFile(varFile) variance1, variance1Ok = stats.extractVarianceFromVarFile(varFile1) x = float(dic_dis[dic_dis.keys()[0]][0]) x_meanCount = int(dic_dis[dic_dis.keys()[3]][3]) rel_seq = {} count_n = {} count_n1 = {} allRelations = stats.loadRelationsFile(relationsFile) ## to count the descriptions in same for different sequence ### all the calculation for ......WO files........ ########## for relation in allRelations: Rel_sequence = relation[1] Rel_discription = relation[0] # protein rel_seq[Rel_sequence] = Rel_discription if Rel_sequence in dic_seq: if rel_seq[Rel_sequence] not in count_n: count_n[rel_seq[Rel_sequence]] = 1 elif rel_seq[Rel_sequence] in count_n: count_n[rel_seq[Rel_sequence]] += 1 for relation_n in allRelations: Rel_sequence = relation_n[1] Rel_discription = relation_n[0] # protein if Rel_sequence in dic_seq: count = count_n[rel_seq[Rel_sequence]] xp = float(dic_seq[Rel_sequence][0]) vp = float(dic_seq[Rel_sequence][1]) NormV_VP = float(dic_seq[Rel_sequence][1]) main_file_des = rel_seq[Rel_sequence] wp = float(1 / (1 / vp + variance)) ###(1 / ((1 / vp) + variance)) wq = float(1 / (1 / wp + variance1)) xq = "0" vq = "1" x_mean = x NormV_xp = float(xp - x_mean) NormW_VP = wq zp = (xp - x_mean) * (math.sqrt(wq)) * math.sqrt(x_meanCount / (x_meanCount - 1)) #print zp norm_wo_V.append([ str(Rel_sequence), "\t", str(NormV_xp), "\t", str(NormV_VP), "\n" ]) norm_wo_W.append([ str(Rel_sequence), "\t", str(NormV_xp), "\t", str(NormW_VP), "\n" ]) main_file.append( [main_file_des, xq, vq, Rel_sequence, xp, vp, x_meanCount, zp]) ################...........W files ..........############################ for relation_n in allRelations: Rel_sequence = relation_n[1] Rel_discription = relation_n[0] if Rel_sequence in dic_seq1: #count1=count_n1[rel_seq[Rel_sequence]] xp1 = float(dic_seq1[Rel_sequence][0]) vp1 = float(dic_seq1[Rel_sequence][1]) w_NormV_VP = float(dic_seq1[Rel_sequence][1]) main_file_des1 = rel_seq[Rel_sequence] #pdb.set_trace() #pdb.set_trace() wp1 = float(1 / (1 / vp1 + variance)) ###(1 / ((1 / vp) + variance)) wq1 = float(1 / (1 / wp1 + variance1)) w_NormW_VP = wp1 if main_file_des1 in dic_dis and main_file_des1 in W_seq: #print W_seq[main_file_des1] count1 = int(W_seq[main_file_des1][0]) #count1=int(W_seq[">sp|O00602|FCN1_HUMAN Ficolin-1 OS=H**o sapiens GN=FCN1 PE=1 SV=2"][0]) #print count1 #pdb.set_trace() #n=int(count1) xq1 = float(dic_dis[main_file_des1][1]) vq1 = float(dic_dis[main_file_des1][2]) w_NormV_xp = float(xp1 - xq1) if count1 > 1: zp1 = (xp1 - xq1) * (math.sqrt(wp1)) * math.sqrt( count1 / (count1 - 1)) else: zp1 = "nan" main_file1.append([ main_file_des1, xq1, vq1, Rel_sequence, xp1, vp1, count1, zp1 ]) #population_A_B() main_file_FINAL = main_file + main_file1 Norm_V.append([ str(Rel_sequence), "\t", str(w_NormV_xp), "\t", str(w_NormV_VP), "\n" ]) Norm_W.append([ str(Rel_sequence), "\t", str(w_NormV_xp), "\t", str(w_NormW_VP), "\n" ]) #pdb.set_trace() #print len(Norm_V) #print len(Norm_W) ## merge another stas file: nonMod_V = [] nonMod_W = [] List_V = [] List_W = [] with open(pep2protein) as file_new: next(file_new) for line in file_new: n_splits = line.split("\t") nonNormV_xp = (float(n_splits[4]) - float(n_splits[1])) nonNormV_VP = (1 / (1 / float(n_splits[5]) + variance)) #nonNormW_VP = (1 / (1/float(n_splits[5]) + variance)) nonRel_sequence = str(n_splits[3]) nonMod_V.append([ str(nonRel_sequence), "\t", str(nonNormV_xp), "\t", str(nonNormV_VP), "\n" ]) #nonMod_W=[str(nonRel_sequence),"\t", str(nonNormV_xp),"\t", str(nonNormW_VP),"\n"] main_file_FINAL.append([ n_splits[0], n_splits[1], n_splits[2], n_splits[3], n_splits[4], n_splits[5], n_splits[6], float(n_splits[7]) ]) #print len(nonMod_V) List_V = Norm_V + norm_wo_V + nonMod_V #print len(List_V) for lineV in List_V: V.writelines(lineV) V.close() List_W = Norm_W + norm_wo_W + nonMod_V for lineW in List_W: W.writelines(lineW) W.close() mainlist = stats.fdr_calculator(main_file_FINAL) return mainlist