Python parseDSD Exemples, myparser.parseDSD Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : DSDmv.py Projet : kdoroschak/tufts-dsd-confidence

# TODO: UPDATE THIS LIST
#        '-m', '2', '-t', '10']
# options = parser.parse_args(args)
options = parser.parse_args()

#### Phase 1: Parse All Input Files
ppbAdj = myparser.parsePPI(options.infile)
N = len(ppbAdj[:, 0])
ppbLabel = myparser.parseLabel(options.label)
m = len(ppbLabel[0, :]) - 1
numLabels = m
pnRD = myparser.parseRDIndex(options.rdindex, ppbLabel)
pnFoldIndex = myparser.GetFoldIndex(pnRD, N, options.k)

if options.mode != 0 and options.mode != 3:
    ppfDSD = myparser.parseDSD(options.dsdfile)

#### Phase 2: Conduct Majority Voting

if options.mode == 0:
    prediction = mvote.ordinaryMV(ppbAdj, ppbLabel, pnFoldIndex, pnRD)
elif options.neighbor <= 0 or options.neighbor >= N / 2:
    options.neighbor == 10
    print >> sys.stderr, "the setting for top DSD neighbors is invalid,"
    print >> sys.stderr, "change to 10 instead by default.\n"
if options.mode == 1:
    prediction = mvote.DSDUnweightMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor)
elif options.mode == 2:
    prediction = mvote.DSDWeightedMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor)
elif options.mode == 3:
    try:

Exemple #2

0

Afficher le fichier

Fichier : mvote.py Projet : kdoroschak/tufts-dsd-confidence

def DSDWeightedMVIterative(dsdFile, masterLabelMatrix, masterPredictionMatrix,
        desiredNumClosest, toPredictFile,
        proteinToMasterIndex, localKeyFile):

    '''
    Weighted DSD Majority Voting
    input: DSDfile -- file containing DSD triangular matrix
           labelMatrix -- label matrix
           predictionMatrix -- matrix of predictions. see "output"
           randomIndexSet -- the set of random indices
           desiredNumClosest -- the number of nodes with lowest DSD used for voting
           localProteinList -- local list of proteins. file format.
           proteinToMasterIndex -- dictionary mapping protein names to master indices.
    output: prediction matrix, first row: the index of node
                       second row: the index of label with top votes
                       third row: the votes of the label for the 2nd row
                              ...
                              ...
                         last but 1 row: index of label with least votes
                         last row: votes of the previous label
    all indices start from 1
    '''

    # NAMING: "local" refers to all proteins in the current dsd
    #         "toPredict" refers to all proteins that we need to generate predictions for (<= "local")
    #         "master" refers to all proteins in the complete protein list

    # ITERATIONS:
    # Read in DSD for single iteration
    #   Send to parser
    # Read in the ordered protein names for the DSD
    # Pass in the internal label matrix (produced in setup,
    #   added to in each iteration) and parse to size
    #   and order of ordered labels
    # Pass in top (number of nodes used for voting)
    # Pass in prediction matrix (to be appended to for results)

    # Run modified version of MV (see below)

    # "Return":
    #     Annotation file
    #     Prediction matrix

    # Set up initial values
    numLabels = masterLabelMatrix.shape[1] - 1

    # Parse DSD file
    dsdMatrix = myparser.parseDSD(dsdFile)

    # Read in proteins from dsd key
    proteinNameToLocalIndex = {}
    localProteins = [] # stores all local protein names, not just to predict
    with open(localKeyFile, 'r') as localKeyFile: # loops through all local proteins
        for i, protein in enumerate(localKeyFile):
            # Make list of proteins in order of DSD file
            proteinNameToLocalIndex[protein.strip()] = i
            localProteins.append(protein.strip())
    numLocalProteins = len(proteinNameToLocalIndex.keys())

    # Read in proteins that need to be predicted
    localIndicesToPredict = [] # Stores local (dsd) indices of proteins to predict
    with open(toPredictFile, 'r') as toPredictFile: # only loops through proteins to be predicted
        for line in toPredictFile:
            protein = line.strip()
            localIndex = proteinNameToLocalIndex[protein]
            localIndicesToPredict.append(localIndex)
    numToPredict = len(localIndicesToPredict)

    # Calculate prediction values for each unlabeled protein
    for localProteinIndex in localIndicesToPredict:
        predictionList = np.zeros(numLabels)

        if localProteinIndex >= dsdMatrix.shape[0]:
            #print localProteinIndex
            continue

        indicesOfSortedDSD = np.argsort(dsdMatrix[localProteinIndex,:])

        # Counter for the number of closest DSD values extracted so far
        #   compared with desiredNumClosest, max # of DSD values for voting
        numClosestChosen = 0

        # Convert to master index
        proteinName = localProteins[localProteinIndex]
        masterProteinIndex = proteinToMasterIndex[proteinName]

        # Go through DSD values from smallest to largest
        #   Calculate prediction values for closest labeled nodes
        for nextClosestDSDIndex in indicesOfSortedDSD[1:]:

            # Look up master index of node with next closest DSD value
            currentDSDProteinName = localProteins[nextClosestDSDIndex]
            masterIndexOfCurrentDSD = proteinToMasterIndex[currentDSDProteinName]
            #print masterIndexOfCurrentDSD

            if numClosestChosen >= desiredNumClosest:
                break

            # Only calculate if we're looking at a labeled protein
            if not masterLabelMatrix[masterIndexOfCurrentDSD, 0]:
                #print "   ", masterIndexOfCurrentDSD, masterLabelMatrix[masterIndexOfCurrentDSD]
                numClosestChosen += 1
                for labeli in xrange(0, numLabels):
                    predictionVal = (masterLabelMatrix[masterIndexOfCurrentDSD, labeli + 1] / dsdMatrix[localProteinIndex, nextClosestDSDIndex])
                    predictionList[labeli] += predictionVal

        # Get indices of the sorted prediction values from high to low confidence
        indicesOfSortedPredictionValues = np.argsort(predictionList)[::-1]

        # Update prediction matrix
        for labeli in xrange(0, numLabels):
            # Populate prediction matrix with ordered prediction values and labels
            labelNum = indicesOfSortedPredictionValues[labeli]

            # Add to label matrix (test)
            #if labeli == 0:
                #masterLabelMatrix[masterProteinIndex, labelNum+1] = 1
                #masterLabelMatrix[masterProteinIndex, 0] = 0

            masterPredictionMatrix[masterProteinIndex, labeli * 2 + 1] = labelNum
            masterPredictionMatrix[masterProteinIndex, labeli * 2 + 2] = predictionList[labelNum]

    #count = 0

    # Clear out labels in label matrix (for testing purposes)
    #for masterProteinIndex in xrange(masterLabelMatrix.shape[0]):
        #masterLabelMatrix[masterProteinIndex, 0] = 1
        #for labeli in xrange(numLabels):
            #masterLabelMatrix[masterProteinIndex, labeli] = 0

    # store in label matrix
    for localProteinIndex in localIndicesToPredict:

        proteinName = localProteins[localProteinIndex]
        masterProteinIndex = proteinToMasterIndex[proteinName]


        #Extract first prediction to put in label matrix
        firstPrediction = masterPredictionMatrix[masterProteinIndex, 1]
        #lastPrediction = masterPredictionMatrix[masterProteinIndex, -2]
        #secondPrediction = masterPredictionMatrix[masterProteinIndex, 3]
        #thirdPrediction = masterPredictionMatrix[masterProteinIndex, 5]
        firstScore = masterPredictionMatrix[masterProteinIndex, 2]
        #secondScore = masterPredictionMatrix[masterProteinIndex, 4]
        #thirdScore = masterPredictionMatrix[masterProteinIndex, 6]

        # Make sure there is a prediction, then store in label matrix
        if firstScore != 0:
            #if masterLabelMatrix[masterProteinIndex, 0] == 1:
                #count += 1
            masterLabelMatrix[masterProteinIndex, firstPrediction + 1] = 1
            #masterLabelMatrix[masterProteinIndex, lastPrediction + 1] = 1
            #masterLabelMatrix[masterProteinIndex, 0] = 0
        #if secondScore != 0:
            #if masterLabelMatrix[masterProteinIndex, 0] == 1:
                #count += 1
            #masterLabelMatrix[masterProteinIndex, secondPrediction + 1] = 1
            #masterLabelMatrix[masterProteinIndex, 0] = 0
        #if thirdScore != 0:
            #if masterLabelMatrix[masterProteinIndex, 0] == 1:
                #count += 1
            #masterLabelMatrix[masterProteinIndex, thirdPrediction + 1] = 1
            #masterLabelMatrix[masterProteinIndex, 0] = 0

    #print count
    return masterPredictionMatrix, masterLabelMatrix

Exemple #3

0

Afficher le fichier

Fichier : DSDmv.py Projet : kdoroschak/tufts-dsd-confidence

#        '-d', 'template//ExactDSD.list',
#        '-r', 'template//firstLevelRandIndex.txt',
#        '-m', '2', '-t', '10']
# options = parser.parse_args(args)
options = parser.parse_args()

#### Phase 1: Parse All Input Files
ppbAdj = myparser.parsePPI(options.infile)
N = len(ppbAdj[:, 0])
ppbLabel = myparser.parseLabel(options.label)
m = len(ppbLabel[0, :]) - 1
pnRD = myparser.parseRDIndex(options.rdindex, ppbLabel)
pnFoldIndex = myparser.GetFoldIndex(pnRD, N, options.k)

if options.mode != 0:
    ppfDSD = myparser.parseDSD(options.dsdfile)
    if options.sequencefile:
        ppfSeq = myparser.parseDSD(options.sequencefile)
    #### print ppfDSD[1,3], ppfDSD[5,11]

#### Phase 2: Conduct Majority Voting

if options.mode == 0:
    prediction = mvote.ordinaryMV(ppbAdj, ppbLabel, pnFoldIndex, pnRD)
elif options.neighbor <= 0 or options.neighbor >= N / 2:
    options.neighbor == 10
    print >>sys.stderr, "the setting for top DSD neighbors is invalid,"
    print >>sys.stderr, "change to 10 instead by default.\n"
if options.mode == 1:
    prediction = mvote.DSDUnweightMV(ppfDSD, ppbLabel, pnFoldIndex, pnRD, options.neighbor)
elif options.mode == 2: