Esempi in Python per loadRelationsFile

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: stats

Metodo/funzione: loadRelationsFile

Esempi su hotexamples.com: 6

loadRelationsFile in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per stats.loadRelationsFile, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: sanxotsieve.py Progetto: CNIC-Proteomics/iq-Proteo

def main(argv):
	
	version = "v0.17"
	analysisName = ""
	analysisFolder = ""
	varianceSeed = 0.001
	FDRLimit = 0.01
	varianceSeedProvided = False
	removeDuplicateUpper = False
	tags = "!out"
	outlierTag = "out"
	logicOperatorsAsWords = False
	dataFile = ""
	relationsFile = ""
	newRelFile = ""
	removedRelFile = ""
	defaultDataFile = "data"
	defaultRelationsFile = "rels"
	defaultTaggedRelFile = "tagged"
	defaultNewRelFile = "cleaned"
	defaultRemovedRelFile = "outliers"
	defaultOutputInfo = "infoFile"
	infoFile = ""
	varFile = ""
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultGraphExtension = ".png"
	verbose = True
	oldWay = False # instead of tagging outliers, separating relations files, the old way
	modeUsed = mode.onePerHigher
	logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	try:
		opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", ["analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators"])
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		stats.saveFile(infoFile, logList, "INFO FILE")
		sys.exit(2)
	
	if len(opts) == 0:
		printHelp(version)
		sys.exit()

	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		if opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		if opt in ("-v", "--var", "--varianceseed"):
			varianceSeed = float(arg)
			varianceSeedProvided = True
		elif opt in ("-d", "--datafile"):
			dataFile = arg
		elif opt in ("-r", "--relfile", "--relationsfile"):
			relationsFile = arg
		elif opt in ("-n", "--newrelfile"):
			removedRelFile = arg
		elif opt in ("-L", "--infofile"):
			infoFile = arg
		elif opt in ("-V", "--varfile"):
			varFile = arg
		elif opt in ("-u", "--one-to-one"):
			modeUsed = mode.onlyOne
		elif opt in ("-b", "--no-verbose"):
			verbose = False
		elif opt in ("--oldway"):
			oldWay = True
		elif opt in ("-f", "--fdrlimit"):
			FDRLimit = float(arg)
		elif opt in ("-D", "--removeduplicateupper"):
			removeDuplicateUpper = True
		elif opt in ("--tags"):
			if arg.strip().lower() != "!out":
				tags = "!out&(" + arg + ")"
		elif opt in ("--word-operators"):
			logicOperatorsAsWords = True
		elif opt in ("--outliertag"):
			outlierTag = "out"
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("-H", "--advanced-help"):
			printHelp(version, advanced = True)
			sys.exit()
	
# REGION: FILE NAMES SETUP
			
	if len(analysisName) == 0:
		if len(dataFile) > 0:
			analysisName = os.path.splitext(os.path.basename(dataFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(dataFile) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(dataFile)) > 0:
			analysisFolder = os.path.dirname(dataFile)

	# input
	if len(dataFile) == 0:
		dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension)
		
	if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
		dataFile = os.path.join(analysisFolder, dataFile)
	
	if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0:
		varFile = os.path.join(analysisFolder, varFile)
		
	if len(varFile) > 0 and not varianceSeedProvided:
		varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed)
		if not varianceOk:
			logList.append(["Variance not found in text file."])
			stats.saveFile(infoFile, logList, "INFO FILE")
			sys.exit()
	
	if len(relationsFile) == 0:
		relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension)
	
	if len(os.path.dirname(relationsFile)) == 0:
		relationsFile = os.path.join(analysisFolder, relationsFile)
	
	# output
	if len(newRelFile) == 0:
		if oldWay: # suffix: "cleaned"
			newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension)
		else: # suffix: "tagged"
			newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension)
	
	if len(removedRelFile) == 0:
		removedRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension)
	
	if len(os.path.dirname(newRelFile)) == 0:
		newRelFile = os.path.join(analysisFolder, newRelFile)
		
	if len(os.path.dirname(removedRelFile)) == 0:
		removedRelFile = os.path.join(analysisFolder, removedRelFile)
	
	if len(infoFile) == 0:
		infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension)
	
	logList.append(["Variance seed = " + str(varianceSeed)])
	logList.append(["Input data file: " + dataFile])
	logList.append(["Input relations file: " + relationsFile])
	if oldWay:
		logList.append(["Output relations file without outliers: " + newRelFile])
		logList.append(["Output relations file with outliers only: " + removedRelFile])
		logList.append(["Removing duplicate higher level elements: " + str(removeDuplicateUpper)])
		logList.append(["OldWay option activated: outliers are removed instead of tagged"])
	else:
		logList.append(["Relations file tagging outliers: " + newRelFile])
		logList.append(["Tags to filter relations: " + tags])
		logList.append(["Tag used for outliers: " + outlierTag])

	# pp.pprint(logList)
	# sys.exit()

# END REGION: FILE NAMES SETUP
	
	relations = stats.loadRelationsFile(relationsFile)
	data = stats.loadInputDataFile(dataFile)
	
	if oldWay:
		# only for backward compatibility. Note that tags are not supported
		newRelations, removedRelations, logResults = \
								getRelationsWithoutOutliers(data,
										relations,
										varianceSeed,
										FDRLimit = FDRLimit,
										modeUsed = modeUsed,
										removeDuplicateUpper = removeDuplicateUpper)
	else:
		newRelations, removedRelations, logResults = \
								tagRelationsWithoutOutliers(data,
										relations,
										varianceSeed,
										FDRLimit = FDRLimit,
										modeUsed = modeUsed,
										removeDuplicateUpper = removeDuplicateUpper,
										tags = tags,
										outlierTag = outlierTag,
										logicOperatorsAsWords = logicOperatorsAsWords)
		
	if oldWay:
		stats.saveFile(newRelFile, newRelations, "idsup\tidinf")
	else:
		stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags")
		
	stats.saveFile(infoFile, logList, "INFO FILE")
	
	if oldWay:
		stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")

Esempio n. 2

Mostra file

File: sanson.py Progetto: CNIC-Proteomics/iSanXoT

def associateElements(inStats = "", uFile = "", relFile = ""):
	
	results = []
	
	relations = stats.loadRelationsFile(relFile)
	relations = stats.sortByIndex(relations, 0)
	
	statsData = stats.loadStatsDataFile(inStats)
	
	ZijList = []
	for element in statsData:
		ZijList.append([element[3], element[7]])
	
	theorList = []
	experList = []
	N = len(ZijList)
	for i in range(N):
		theorList.append([ZijList[i][0], ZijList[i][1], norm.cdf(float(ZijList[i][1]))])
		experList.append([ZijList[i][0], ZijList[i][1], (float(i) + 0.5) / float(N)])
	
	higherElements = stats.load2stringList(uFile, removeCommas = True)
	
	# WARNING! higherElements must be a list of lists
	# with each sublist being id, n, Z, FDR, X
    
    # begin: jmrc
	if not higherElements:
		sms = "ERROR: higherElements is empty. The higherElements must be a list of lists with each sublist being id, n, Z, FDR, X"
		sys.exit(sms)
    # end: jmrc
	
	elementList = []
	if higherElements[0] == ['id', 'Z', 'n']:
		# this means the list comes from SanXoTSqueezer
		# so the header and the extra columns have to be removed
		for element in higherElements[1:]:
			# switch to id, n, Z, FDR
			elementList.append([element[0], element[2], element[1], float("nan"), float("nan")])
	
	if higherElements[0] == ['id', 'n', 'Z', 'FDR']:
		# this means it does not contain X, so a nan is put on its place
		for element in higherElements[1:]:
			elementList.append([element[0], element[1], element[2], element[3], float("nan")])
		
	if higherElements[0] == ['id', 'n', 'Z', 'FDR', 'X']:
		for element in higherElements[1:]:
			elementList.append([element[0], element[1], element[2], element[3], element[4]])
	
	# otherwise
	if higherElements[0] != ['id', 'Z', 'n'] and higherElements[0] != ['id', 'n', 'Z', 'FDR'] and higherElements[0] != ['id', 'n', 'Z', 'FDR', 'X']:
		for element in higherElements:
			elementList.append([element[0], float("nan"), float("nan"), float("nan"), float("nan")])
		
	statsData = stats.sortByIndex(statsData, 7)
	
	relationsFirstColumn = stats.extractColumns(relations, 0)
	relationsSecondColumn = stats.extractColumns(relations, 1)
	experListFirstColumn = stats.extractColumns(experList, 0)
	
	for uElement in elementList:
		lowerElementList = []
		first = stats.firstIndex(relationsFirstColumn, uElement[0])
		
		if first > -1: # -1 means it is not in the list
			notInList = 0
			last = stats.lastIndex(relationsFirstColumn, uElement[0])
			lowerElements = relationsSecondColumn[first:last + 1] # "+1" is to include the last one
			for element in lowerElements:
				lowerIndex = stats.firstIndex(experListFirstColumn, element)
				
				if lowerIndex > -1: # -1 means it is not in the list
					lowerElementList.append(element)
				else:
					notInList += 1
				
			lowerElementList = stats.sortByIndex(lowerElementList, 0)
			
			results.append([uElement[0], lowerElementList])
			
		else:
			if len(uElement[0].strip()) > 0:
				results.append([uElement[0], None])

	return results, elementList, ""

Esempio n. 3

Mostra file

def calibrate(inputRawData=None,
              inputRelations=None,
              rawDataFile="",
              relationsFile="",
              kSeed=1,
              varianceSeed=0.001,
              medianSide=100,
              maxIterations=0,
              verbose=False,
              showGraph=False,
              showSumSQ=False,
              forceParameters=False,
              alphaSeed=1.0,
              showRank=True,
              useCooperativity=False,
              graphFileVRank="",
              graphFileVValue="",
              graphDataFile="",
              graphDPI=None):

    extraLog = []
    extraLog.append([])
    if verbose:
        print()
        print("loading input raw data file")
        extraLog.append(["loading input raw data file"])

    if inputRawData == None:
        if len(rawDataFile) == 0:
            print("Error: no input raw data")
            extraLog.append(["Error: no input raw data"])

        else:
            inputRawData = stats.loadInputDataFile(rawDataFile)

    if verbose:
        print("loading relations file")
        extraLog.append(["loading relations file"])

    if inputRelations == None:
        if len(relationsFile) == 0:
            if not forceParameters:
                print("Error: no relations file")
                extraLog.append(["Error: no relations file"])
                return None, None, None, None, extraLog
        else:
            inputRelations = stats.loadRelationsFile(relationsFile)

    #### calculate k and variance ####

    alpha = 1.0
    if not forceParameters:
        if verbose:
            print("calculating K and variance")
            extraLog.append(["calculating K and variance"])

        # *** just to see it
        result = getKandVariance(inputRawData,
                                 inputRelations,
                                 kSeed=kSeed,
                                 varianceSeed=varianceSeed,
                                 maxIterations=maxIterations,
                                 verbose=True,
                                 showSumSQ=True,
                                 medianSide=medianSide,
                                 alphaSeed=alphaSeed,
                                 useCooperativity=useCooperativity)

        k = result[0]
        variance = result[1]
        if useCooperativity: alpha = result[2]
    else:
        k = kSeed
        variance = varianceSeed
        alpha = alphaSeed

    # save VRank graph
    showGraphTool(inputRawData,
                  inputRelations,
                  k,
                  variance,
                  alpha,
                  medianSide,
                  showRank=True,
                  graphFile=graphFileVRank,
                  graphData=graphDataFile,
                  dpi=graphDPI,
                  showGraph=showGraph)
    # save VValue graph
    showGraphTool(inputRawData,
                  inputRelations,
                  k,
                  variance,
                  alpha,
                  medianSide,
                  showRank=False,
                  graphFile=graphFileVValue,
                  dpi=graphDPI,
                  showGraph=showGraph)

    # get calibrated idXV

    idXV = idXVcal(inputRawData, k, alpha)

    return idXV, variance, k, alpha, extraLog

Esempio n. 4

Mostra file

File: klibrate.py Progetto: CNIC-Proteomics/iq-Proteo

def calibrate(inputRawData = None,
					inputRelations = None,
					rawDataFile = "",
					relationsFile = "",
					kSeed = 1,
					varianceSeed = 0.001,
					medianSide = 100,
					maxIterations = 0,
					verbose = False,
					showGraph = False,
					showSumSQ = False,
					forceParameters = False,
					alphaSeed = 1.0,
					showRank = True,
					useCooperativity = False,
					graphFileVRank = "",
					graphFileVValue = "",
					graphDataFile = "",
					graphDPI = None):

	extraLog = []
	extraLog.append([])
	if verbose:
		print()
		print("loading input raw data file")
		extraLog.append(["loading input raw data file"])
	
	if inputRawData == None:
		if len(rawDataFile) == 0:
			print("Error: no input raw data")
			extraLog.append(["Error: no input raw data"])
			
		else:
			inputRawData = stats.loadInputDataFile(rawDataFile)
	
	if verbose:
		print("loading relations file")
		extraLog.append(["loading relations file"])
	
	if inputRelations == None:
		if len(relationsFile) == 0:
			if not forceParameters:
				print("Error: no relations file")
				extraLog.append(["Error: no relations file"])
				return None, None, None, None, extraLog
		else:
			inputRelations = stats.loadRelationsFile(relationsFile)
	
	#### calculate k and variance ####
	
	alpha = 1.0
	if not forceParameters:
		if verbose:
			print("calculating K and variance")
			extraLog.append(["calculating K and variance"])

		# *** just to see it
		result = getKandVariance(inputRawData, inputRelations, kSeed = kSeed, varianceSeed = varianceSeed, maxIterations = maxIterations, verbose = True, showSumSQ = True, medianSide = medianSide, alphaSeed = alphaSeed, useCooperativity = useCooperativity)
		
		k = result[0]
		variance = result[1]
		if useCooperativity: alpha = result[2]
	else:
		k = kSeed
		variance = varianceSeed
		alpha = alphaSeed
	
	# save VRank graph
	showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = True, graphFile = graphFileVRank, graphData = graphDataFile, dpi = graphDPI, showGraph = showGraph)
	# save VValue graph
	showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = False, graphFile = graphFileVValue, dpi = graphDPI, showGraph = showGraph)
	
	# get calibrated idXV
	
	idXV = idXVcal(inputRawData, k, alpha)
	
	return idXV, variance, k, alpha, extraLog

Esempio n. 5

Mostra file

def main(argv):

    version = "v0.17"
    analysisName = ""
    analysisFolder = ""
    varianceSeed = 0.001
    FDRLimit = 0.01
    varianceSeedProvided = False
    removeDuplicateUpper = False
    tags = "!out"
    outlierTag = "out"
    logicOperatorsAsWords = False
    dataFile = ""
    relationsFile = ""
    newRelFile = ""
    removedRelFile = ""
    defaultDataFile = "data"
    defaultRelationsFile = "rels"
    defaultTaggedRelFile = "tagged"
    defaultNewRelFile = "cleaned"
    defaultRemovedRelFile = "outliers"
    defaultOutputInfo = "infoFile"
    infoFile = ""
    varFile = ""
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    verbose = True
    oldWay = False  # instead of tagging outliers, separating relations files, the old way
    modeUsed = mode.onePerHigher
    logList = [["SanXoTSieve " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", [
            "analysis=", "folder=", "varianceseed=", "datafile=", "relfile=",
            "newrelfile=", "outlierrelfile=", "infofile=", "varfile=",
            "fdrlimit=", "one-to-one", "no-verbose", "randomise",
            "removeduplicateupper", "help", "advanced-help", "tags=",
            "outliertag=", "oldway", "word-operators"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-v", "--var", "--varianceseed"):
            varianceSeed = float(arg)
            varianceSeedProvided = True
        elif opt in ("-d", "--datafile"):
            dataFile = arg
        elif opt in ("-r", "--relfile", "--relationsfile"):
            relationsFile = arg
        elif opt in ("-n", "--newrelfile"):
            removedRelFile = arg
        elif opt in ("-L", "--infofile"):
            infoFile = arg
        elif opt in ("-V", "--varfile"):
            varFile = arg
        elif opt in ("-u", "--one-to-one"):
            modeUsed = mode.onlyOne
        elif opt in ("-b", "--no-verbose"):
            verbose = False
        elif opt in ("--oldway"):
            oldWay = True
        elif opt in ("-f", "--fdrlimit"):
            FDRLimit = float(arg)
        elif opt in ("-D", "--removeduplicateupper"):
            removeDuplicateUpper = True
        elif opt in ("--tags"):
            if arg.strip().lower() != "!out":
                tags = "!out&(" + arg + ")"
        elif opt in ("--word-operators"):
            logicOperatorsAsWords = True
        elif opt in ("--outliertag"):
            outlierTag = "out"
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()
        elif opt in ("-H", "--advanced-help"):
            printHelp(version, advanced=True)
            sys.exit()

# REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(dataFile) > 0:
            analysisName = os.path.splitext(os.path.basename(dataFile))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(dataFile) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(dataFile)) > 0:
            analysisFolder = os.path.dirname(dataFile)

    # input
    if len(dataFile) == 0:
        dataFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultDataFile + defaultTableExtension)

    if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
        dataFile = os.path.join(analysisFolder, dataFile)

    if len(os.path.dirname(varFile)) == 0 and len(
            os.path.basename(varFile)) > 0:
        varFile = os.path.join(analysisFolder, varFile)

    if len(varFile) > 0 and not varianceSeedProvided:
        varianceSeed, varianceOk = stats.extractVarianceFromVarFile(
            varFile, verbose=verbose, defaultSeed=varianceSeed)
        if not varianceOk:
            logList.append(["Variance not found in text file."])
            stats.saveFile(infoFile, logList, "INFO FILE")
            sys.exit()

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(os.path.dirname(relationsFile)) == 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    # output
    if len(newRelFile) == 0:
        if oldWay:  # suffix: "cleaned"
            newRelFile = os.path.join(
                analysisFolder,
                analysisName + "_" + defaultNewRelFile + defaultTableExtension)
        else:  # suffix: "tagged"
            newRelFile = os.path.join(
                analysisFolder, analysisName + "_" + defaultTaggedRelFile +
                defaultTableExtension)

    if len(removedRelFile) == 0:
        removedRelFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRemovedRelFile + defaultTableExtension)

    if len(os.path.dirname(newRelFile)) == 0:
        newRelFile = os.path.join(analysisFolder, newRelFile)

    if len(os.path.dirname(removedRelFile)) == 0:
        removedRelFile = os.path.join(analysisFolder, removedRelFile)

    if len(infoFile) == 0:
        infoFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputInfo + defaultTextExtension)

    logList.append(["Variance seed = " + str(varianceSeed)])
    logList.append(["Input data file: " + dataFile])
    logList.append(["Input relations file: " + relationsFile])
    if oldWay:
        logList.append(
            ["Output relations file without outliers: " + newRelFile])
        logList.append(
            ["Output relations file with outliers only: " + removedRelFile])
        logList.append([
            "Removing duplicate higher level elements: " +
            str(removeDuplicateUpper)
        ])
        logList.append([
            "OldWay option activated: outliers are removed instead of tagged"
        ])
    else:
        logList.append(["Relations file tagging outliers: " + newRelFile])
        logList.append(["Tags to filter relations: " + tags])
        logList.append(["Tag used for outliers: " + outlierTag])

    # pp.pprint(logList)
    # sys.exit()

# END REGION: FILE NAMES SETUP

    relations = stats.loadRelationsFile(relationsFile)
    data = stats.loadInputDataFile(dataFile)

    if oldWay:
        # only for backward compatibility. Note that tags are not supported
        newRelations, removedRelations, logResults = \
              getRelationsWithoutOutliers(data,
                relations,
                varianceSeed,
                FDRLimit = FDRLimit,
                modeUsed = modeUsed,
                removeDuplicateUpper = removeDuplicateUpper)
    else:
        newRelations, removedRelations, logResults = \
              tagRelationsWithoutOutliers(data,
                relations,
                varianceSeed,
                FDRLimit = FDRLimit,
                modeUsed = modeUsed,
                removeDuplicateUpper = removeDuplicateUpper,
                tags = tags,
                outlierTag = outlierTag,
                logicOperatorsAsWords = logicOperatorsAsWords)

    if oldWay:
        stats.saveFile(newRelFile, newRelations, "idsup\tidinf")
    else:
        stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags")

    stats.saveFile(infoFile, logList, "INFO FILE")

    if oldWay:
        stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")

Esempio n. 6

Mostra file

File: sanxotghost.py Progetto: CNIC-Proteomics/SanXoT

def ZpCalculator(relationsFile, modifiedPeptidesFile, modifiedPeptidesFile1,
                 nonModifiedPep2ProtFile, pep2protein, outname, varFile,
                 varFile1):
    folderout = outname.split("\\")
    OutfolderLocation = "\\".join(folderout[:-1])
    norm_wo_V = []
    norm_wo_W = []
    main_file = []
    main_file1 = []
    main_file_FINAL = []
    Norm_V = [["idinf", "\t", "X'inf", "\t", "Vinf", "\n"]]
    Norm_W = [["idinf", "\t", "X'inf", "\t", "Winf", "\n"]]
    W = open(OutfolderLocation + "\peptide2protein_Norm_W.txt", "w")
    V = open(OutfolderLocation + "\peptide2protein_Norm_V.txt", "w")

    dic_seq = stats.load2dictionary(
        modifiedPeptidesFile, keyNum=0, n1=1,
        n2=2)  ################# wo file ###########
    dic_dis = stats.load2dictionary(
        nonModifiedPep2ProtFile, keyNum=3, n1=1, n2=4, n3=5,
        n4=6)  ######### protein to all file ########
    dic_seq1 = stats.load2dictionary(modifiedPeptidesFile1,
                                     keyNum=0,
                                     n1=1,
                                     n2=2)  ################ W file #########
    W_seq = stats.load2dictionary(pep2protein, keyNum=0,
                                  n1=6)  ###nonmod peptide to protein####
    #print W_seq
    #pdb.set_trace()
    variance, varianceOk = stats.extractVarianceFromVarFile(varFile)
    variance1, variance1Ok = stats.extractVarianceFromVarFile(varFile1)
    x = float(dic_dis[dic_dis.keys()[0]][0])
    x_meanCount = int(dic_dis[dic_dis.keys()[3]][3])

    rel_seq = {}
    count_n = {}
    count_n1 = {}
    allRelations = stats.loadRelationsFile(relationsFile)
    ## to count the descriptions in same for different sequence
    ### all the calculation for ......WO files........ ##########
    for relation in allRelations:
        Rel_sequence = relation[1]
        Rel_discription = relation[0]  # protein
        rel_seq[Rel_sequence] = Rel_discription
        if Rel_sequence in dic_seq:
            if rel_seq[Rel_sequence] not in count_n:
                count_n[rel_seq[Rel_sequence]] = 1
            elif rel_seq[Rel_sequence] in count_n:
                count_n[rel_seq[Rel_sequence]] += 1

    for relation_n in allRelations:
        Rel_sequence = relation_n[1]
        Rel_discription = relation_n[0]  # protein
        if Rel_sequence in dic_seq:
            count = count_n[rel_seq[Rel_sequence]]
            xp = float(dic_seq[Rel_sequence][0])
            vp = float(dic_seq[Rel_sequence][1])
            NormV_VP = float(dic_seq[Rel_sequence][1])
            main_file_des = rel_seq[Rel_sequence]
            wp = float(1 / (1 / vp + variance))  ###(1 / ((1 / vp) + variance))
            wq = float(1 / (1 / wp + variance1))
            xq = "0"
            vq = "1"
            x_mean = x
            NormV_xp = float(xp - x_mean)
            NormW_VP = wq
            zp = (xp - x_mean) * (math.sqrt(wq)) * math.sqrt(x_meanCount /
                                                             (x_meanCount - 1))
            #print zp
            norm_wo_V.append([
                str(Rel_sequence), "\t",
                str(NormV_xp), "\t",
                str(NormV_VP), "\n"
            ])
            norm_wo_W.append([
                str(Rel_sequence), "\t",
                str(NormV_xp), "\t",
                str(NormW_VP), "\n"
            ])
            main_file.append(
                [main_file_des, xq, vq, Rel_sequence, xp, vp, x_meanCount, zp])

################...........W files ..........############################
    for relation_n in allRelations:
        Rel_sequence = relation_n[1]
        Rel_discription = relation_n[0]
        if Rel_sequence in dic_seq1:
            #count1=count_n1[rel_seq[Rel_sequence]]
            xp1 = float(dic_seq1[Rel_sequence][0])
            vp1 = float(dic_seq1[Rel_sequence][1])
            w_NormV_VP = float(dic_seq1[Rel_sequence][1])
            main_file_des1 = rel_seq[Rel_sequence]
            #pdb.set_trace()
            #pdb.set_trace()
            wp1 = float(1 /
                        (1 / vp1 + variance))  ###(1 / ((1 / vp) + variance))
            wq1 = float(1 / (1 / wp1 + variance1))
            w_NormW_VP = wp1
            if main_file_des1 in dic_dis and main_file_des1 in W_seq:
                #print W_seq[main_file_des1]
                count1 = int(W_seq[main_file_des1][0])
                #count1=int(W_seq[">sp|O00602|FCN1_HUMAN Ficolin-1 OS=H**o sapiens GN=FCN1 PE=1 SV=2"][0])
                #print count1
                #pdb.set_trace()
                #n=int(count1)
                xq1 = float(dic_dis[main_file_des1][1])
                vq1 = float(dic_dis[main_file_des1][2])
                w_NormV_xp = float(xp1 - xq1)
                if count1 > 1:
                    zp1 = (xp1 - xq1) * (math.sqrt(wp1)) * math.sqrt(
                        count1 / (count1 - 1))
                else:
                    zp1 = "nan"
                main_file1.append([
                    main_file_des1, xq1, vq1, Rel_sequence, xp1, vp1, count1,
                    zp1
                ])  #population_A_B()
                main_file_FINAL = main_file + main_file1
                Norm_V.append([
                    str(Rel_sequence), "\t",
                    str(w_NormV_xp), "\t",
                    str(w_NormV_VP), "\n"
                ])
                Norm_W.append([
                    str(Rel_sequence), "\t",
                    str(w_NormV_xp), "\t",
                    str(w_NormW_VP), "\n"
                ])
                #pdb.set_trace()
    #print len(Norm_V)
    #print len(Norm_W)
    ## merge another stas file:
    nonMod_V = []
    nonMod_W = []
    List_V = []
    List_W = []
    with open(pep2protein) as file_new:
        next(file_new)
        for line in file_new:
            n_splits = line.split("\t")
            nonNormV_xp = (float(n_splits[4]) - float(n_splits[1]))
            nonNormV_VP = (1 / (1 / float(n_splits[5]) + variance))
            #nonNormW_VP = (1 /  (1/float(n_splits[5]) + variance))
            nonRel_sequence = str(n_splits[3])
            nonMod_V.append([
                str(nonRel_sequence), "\t",
                str(nonNormV_xp), "\t",
                str(nonNormV_VP), "\n"
            ])
            #nonMod_W=[str(nonRel_sequence),"\t", str(nonNormV_xp),"\t", str(nonNormW_VP),"\n"]
            main_file_FINAL.append([
                n_splits[0], n_splits[1], n_splits[2], n_splits[3],
                n_splits[4], n_splits[5], n_splits[6],
                float(n_splits[7])
            ])
    #print len(nonMod_V)
    List_V = Norm_V + norm_wo_V + nonMod_V
    #print len(List_V)
    for lineV in List_V:
        V.writelines(lineV)
    V.close()

    List_W = Norm_W + norm_wo_W + nonMod_V
    for lineW in List_W:
        W.writelines(lineW)
    W.close()
    mainlist = stats.fdr_calculator(main_file_FINAL)
    return mainlist