# TODO: UPDATE THIS LIST # '-m', '2', '-t', '10'] # options = parser.parse_args(args) options = parser.parse_args() #### Phase 1: Parse All Input Files ppbAdj = myparser.parsePPI(options.infile) N = len(ppbAdj[:, 0]) ppbLabel = myparser.parseLabel(options.label) m = len(ppbLabel[0, :]) - 1 numLabels = m pnRD = myparser.parseRDIndex(options.rdindex, ppbLabel) pnFoldIndex = myparser.GetFoldIndex(pnRD, N, options.k) if options.mode != 0 and options.mode != 3: ppfDSD = myparser.parseDSD(options.dsdfile) #### Phase 2: Conduct Majority Voting if options.mode == 0: prediction = mvote.ordinaryMV(ppbAdj, ppbLabel, pnFoldIndex, pnRD) elif options.neighbor <= 0 or options.neighbor >= N / 2: options.neighbor == 10 print >> sys.stderr, "the setting for top DSD neighbors is invalid," print >> sys.stderr, "change to 10 instead by default.\n" if options.mode == 1: prediction = mvote.DSDUnweightMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor) elif options.mode == 2: prediction = mvote.DSDWeightedMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor) elif options.mode == 3: try:
def DSDWeightedMVIterative(dsdFile, masterLabelMatrix, masterPredictionMatrix, desiredNumClosest, toPredictFile, proteinToMasterIndex, localKeyFile): ''' Weighted DSD Majority Voting input: DSDfile -- file containing DSD triangular matrix labelMatrix -- label matrix predictionMatrix -- matrix of predictions. see "output" randomIndexSet -- the set of random indices desiredNumClosest -- the number of nodes with lowest DSD used for voting localProteinList -- local list of proteins. file format. proteinToMasterIndex -- dictionary mapping protein names to master indices. output: prediction matrix, first row: the index of node second row: the index of label with top votes third row: the votes of the label for the 2nd row ... ... last but 1 row: index of label with least votes last row: votes of the previous label all indices start from 1 ''' # NAMING: "local" refers to all proteins in the current dsd # "toPredict" refers to all proteins that we need to generate predictions for (<= "local") # "master" refers to all proteins in the complete protein list # ITERATIONS: # Read in DSD for single iteration # Send to parser # Read in the ordered protein names for the DSD # Pass in the internal label matrix (produced in setup, # added to in each iteration) and parse to size # and order of ordered labels # Pass in top (number of nodes used for voting) # Pass in prediction matrix (to be appended to for results) # Run modified version of MV (see below) # "Return": # Annotation file # Prediction matrix # Set up initial values numLabels = masterLabelMatrix.shape[1] - 1 # Parse DSD file dsdMatrix = myparser.parseDSD(dsdFile) # Read in proteins from dsd key proteinNameToLocalIndex = {} localProteins = [] # stores all local protein names, not just to predict with open(localKeyFile, 'r') as localKeyFile: # loops through all local proteins for i, protein in enumerate(localKeyFile): # Make list of proteins in order of DSD file proteinNameToLocalIndex[protein.strip()] = i localProteins.append(protein.strip()) numLocalProteins = len(proteinNameToLocalIndex.keys()) # Read in proteins that need to be predicted localIndicesToPredict = [] # Stores local (dsd) indices of proteins to predict with open(toPredictFile, 'r') as toPredictFile: # only loops through proteins to be predicted for line in toPredictFile: protein = line.strip() localIndex = proteinNameToLocalIndex[protein] localIndicesToPredict.append(localIndex) numToPredict = len(localIndicesToPredict) # Calculate prediction values for each unlabeled protein for localProteinIndex in localIndicesToPredict: predictionList = np.zeros(numLabels) if localProteinIndex >= dsdMatrix.shape[0]: #print localProteinIndex continue indicesOfSortedDSD = np.argsort(dsdMatrix[localProteinIndex,:]) # Counter for the number of closest DSD values extracted so far # compared with desiredNumClosest, max # of DSD values for voting numClosestChosen = 0 # Convert to master index proteinName = localProteins[localProteinIndex] masterProteinIndex = proteinToMasterIndex[proteinName] # Go through DSD values from smallest to largest # Calculate prediction values for closest labeled nodes for nextClosestDSDIndex in indicesOfSortedDSD[1:]: # Look up master index of node with next closest DSD value currentDSDProteinName = localProteins[nextClosestDSDIndex] masterIndexOfCurrentDSD = proteinToMasterIndex[currentDSDProteinName] #print masterIndexOfCurrentDSD if numClosestChosen >= desiredNumClosest: break # Only calculate if we're looking at a labeled protein if not masterLabelMatrix[masterIndexOfCurrentDSD, 0]: #print " ", masterIndexOfCurrentDSD, masterLabelMatrix[masterIndexOfCurrentDSD] numClosestChosen += 1 for labeli in xrange(0, numLabels): predictionVal = (masterLabelMatrix[masterIndexOfCurrentDSD, labeli + 1] / dsdMatrix[localProteinIndex, nextClosestDSDIndex]) predictionList[labeli] += predictionVal # Get indices of the sorted prediction values from high to low confidence indicesOfSortedPredictionValues = np.argsort(predictionList)[::-1] # Update prediction matrix for labeli in xrange(0, numLabels): # Populate prediction matrix with ordered prediction values and labels labelNum = indicesOfSortedPredictionValues[labeli] # Add to label matrix (test) #if labeli == 0: #masterLabelMatrix[masterProteinIndex, labelNum+1] = 1 #masterLabelMatrix[masterProteinIndex, 0] = 0 masterPredictionMatrix[masterProteinIndex, labeli * 2 + 1] = labelNum masterPredictionMatrix[masterProteinIndex, labeli * 2 + 2] = predictionList[labelNum] #count = 0 # Clear out labels in label matrix (for testing purposes) #for masterProteinIndex in xrange(masterLabelMatrix.shape[0]): #masterLabelMatrix[masterProteinIndex, 0] = 1 #for labeli in xrange(numLabels): #masterLabelMatrix[masterProteinIndex, labeli] = 0 # store in label matrix for localProteinIndex in localIndicesToPredict: proteinName = localProteins[localProteinIndex] masterProteinIndex = proteinToMasterIndex[proteinName] #Extract first prediction to put in label matrix firstPrediction = masterPredictionMatrix[masterProteinIndex, 1] #lastPrediction = masterPredictionMatrix[masterProteinIndex, -2] #secondPrediction = masterPredictionMatrix[masterProteinIndex, 3] #thirdPrediction = masterPredictionMatrix[masterProteinIndex, 5] firstScore = masterPredictionMatrix[masterProteinIndex, 2] #secondScore = masterPredictionMatrix[masterProteinIndex, 4] #thirdScore = masterPredictionMatrix[masterProteinIndex, 6] # Make sure there is a prediction, then store in label matrix if firstScore != 0: #if masterLabelMatrix[masterProteinIndex, 0] == 1: #count += 1 masterLabelMatrix[masterProteinIndex, firstPrediction + 1] = 1 #masterLabelMatrix[masterProteinIndex, lastPrediction + 1] = 1 #masterLabelMatrix[masterProteinIndex, 0] = 0 #if secondScore != 0: #if masterLabelMatrix[masterProteinIndex, 0] == 1: #count += 1 #masterLabelMatrix[masterProteinIndex, secondPrediction + 1] = 1 #masterLabelMatrix[masterProteinIndex, 0] = 0 #if thirdScore != 0: #if masterLabelMatrix[masterProteinIndex, 0] == 1: #count += 1 #masterLabelMatrix[masterProteinIndex, thirdPrediction + 1] = 1 #masterLabelMatrix[masterProteinIndex, 0] = 0 #print count return masterPredictionMatrix, masterLabelMatrix
# '-d', 'template//ExactDSD.list', # '-r', 'template//firstLevelRandIndex.txt', # '-m', '2', '-t', '10'] # options = parser.parse_args(args) options = parser.parse_args() #### Phase 1: Parse All Input Files ppbAdj = myparser.parsePPI(options.infile) N = len(ppbAdj[:, 0]) ppbLabel = myparser.parseLabel(options.label) m = len(ppbLabel[0, :]) - 1 pnRD = myparser.parseRDIndex(options.rdindex, ppbLabel) pnFoldIndex = myparser.GetFoldIndex(pnRD, N, options.k) if options.mode != 0: ppfDSD = myparser.parseDSD(options.dsdfile) if options.sequencefile: ppfSeq = myparser.parseDSD(options.sequencefile) #### print ppfDSD[1,3], ppfDSD[5,11] #### Phase 2: Conduct Majority Voting if options.mode == 0: prediction = mvote.ordinaryMV(ppbAdj, ppbLabel, pnFoldIndex, pnRD) elif options.neighbor <= 0 or options.neighbor >= N / 2: options.neighbor == 10 print >>sys.stderr, "the setting for top DSD neighbors is invalid," print >>sys.stderr, "change to 10 instead by default.\n" if options.mode == 1: prediction = mvote.DSDUnweightMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor) elif options.mode == 2: