Esempio n. 1
0
def maxEntInf(namePath):
    dataNames = ["facebook", "IMDB_5", "amazon_DVD_7500", "amazon_DVD_20000", "amazon_Music_7500", "amazon_Music_64500", "patents_computers_50attr"]
    trainN = np.load(namePath+"TraC.npy")
    validN = np.load(namePath+"ValC.npy")
    testN = np.load(namePath+"TestC.npy")

    #grab name from filePath
    fName=""
    for name in dataNames:
        if name in namePath:
            fName=name

    #compute label proportions
    G = readData.readDataset(dataFolder, fName) 
    traValNodes = [x[0] for x in trainN] + [x[0] for x in validN] 
    labProps = graph_helper.getLabelCounts(G, traValNodes)
    total = 0
    negProp = 0.0
    for key in labProps:
        total += labProps[key]
    for key in labProps:
        labProps[key] = float(labProps[key])/total
    #assume that negative labels are higher proportioned
    for key in labProps:
        if negProp < labProps[key]:
            negProp = labProps[key]

    #print(negProp)

    testNodes = {}
    testNodesOld = {}
    for tu in testN:
        x = float(tu[1])
        if x == 1:
            x = 0.999999999999999
        elif x == 0:
            x = 0.000000000000001
        testNodes[tu[0]]=math.log(x/ (1-x))
        testNodesOld[tu[0]] = x


    centralIndex = int(negProp*len(testNodes))
    centralValue = sorted(testNodes.values())[centralIndex]

    testNodesNewProbs = {}
    actualLabs = {}
    for tu in testNodes:
        x = testNodes[tu]
        newX = x - centralValue
        testNodesNewProbs[tu] = 1.0/(1+math.exp(-newX))
        #print('nodeID: '+str(tu)+', prob: '+str(testNodesNewProbs[tu]) + ', oldprob: '+str(testNodesOld[tu]))
        actualLabs[tu] = G.node[tu]['label']
    return BAE(testNodesNewProbs, actualLabs)
Esempio n. 2
0
def test1(fName):
    percentTest = 0.35
    percentValidation = 0.2
    load_path = 'models/Rnn_vary_training_A_BAE_amazon_DVD_20000_trial_0_fold_0.pkl'
    G = readData.readDataset("data/", fName)
    (testNodes, validationNodes,
     rest) = readData.splitNodes(G.nodes(), percentTest, percentValidation)
    rnn = RelationalRNN(G=G,
                        batch_size=10,
                        attrKey='attr',
                        load_path=load_path)
    accuracy = rnn.makePredictions(testNodes)
    print("accuracy: " + str(accuracy))
Esempio n. 3
0
def experiment1(variables):
    exec ""
    locals().update(variables)
    from code.RelationalModels.node2vecLR import node2vecLR
    startTime = time.time()
    G = readData.readDataset(dataFolder, fName, no0Deg=True)
    GFirst = G
    retDict = {}

    rest, validationNodes = readData.readTrial(fName, i, percentValidation)

    #prune out nodes that don't exist in GFirst
    rest = graph_helper.prune0s(GFirst, rest)
    validationNodes = graph_helper.prune0s(GFirst, validationNodes)

    #split into folds
    folds = readData.splitNodeFolds(rest, numFolds)

    #vary training set
    foldsStart = time.time()
    #m is index into array, j is fold index. they are the same if we are looping over all folds

    #add up trainNodes
    trainNodes = []
    for k in range(0, j + 1):
        trainNodes += folds[k]

    #add up rest of nodes
    testNodes = []
    for k in range(j + 1, numFolds):
        testNodes += folds[k]

    actual_save_path = save_path + "_trial_" + str(i) + "_fold_" + str(j)

    lr = node2vecLR(dataFolder + fName, trainNodes, validationNodes, testNodes)
    lr.train()
    accuracyTrain = lr.predictBAE(testSet="train")
    accuracyValid = lr.predictBAE(testSet="valid")
    accuracyTest = lr.predictBAE(testSet="test")
    retDict['accuracyTrain'] = accuracyTrain
    retDict['accuracyValid'] = accuracyValid
    retDict['accuracyTest'] = accuracyTest

    np.save(save_path + "_BAE_Test", np.array(accuracyTest))
    np.save(save_path + "_BAE_Tra", np.array(accuracyTrain))
    np.save(save_path + "_BAE_Val", np.array(accuracyValid))

    elapsed = time.time() - startTime
    print("trial: " + str(i) + ", fold: " + str(j) + ", time: " + str(elapsed))
    return retDict
Esempio n. 4
0
def test_D(accuracyOutputName,
           fName,
           memory,
           mini_dim,
           avgNeighb,
           degree,
           neighbDegree,
           avgNeighbLabel,
           dynamLabel,
           avgPosNegAttr,
           num01s,
           noValid,
           netType,
           useActualLabs,
           onlyLabs,
           perturb=False,
           gpu="cpu",
           localClustering=False,
           PRType="neg",
           singlePR=False,
           bias_self="",
           testLimit=False,
           usePrevWeights=False,
           n0Deg=False,
           dataAug="none",
           randInit=False,
           pageRankOrder=True,
           usePro=False,
           lastH=False,
           changeTrainValid=0):
    startTime = time.time()

    #maxNProp=5
    maxNProp = 0
    prevStr = ""
    if usePrevWeights:
        prevStr = "w_"
        #maxNProp=100
        maxNProp = 1
    if no0Deg:
        prevStr = prevStr + "no0_"

    if debug:
        print('uncomment your values')
        sys.exit(0)
        """
        maxNProp=0
        max_epochs=200
        trials=1
        selectFolds=[15]
        numProcesses=1
        print("in debugging mode")"""

    #create unique experiment name for output file names
    accuracyOutputName=accuracyOutputName+prevStr+netType+"_aug_"+dataAug+"_"+fName+"_mNe_"+str(maxNeighbors)+"_mmNe_"+str(maxNeighbors2)+"_noVl_"+str(bStr(noValid))+"_Mem_"+str(memory)+"_min_"+str(mini_dim) + \
        "_mEp_"+str(max_epochs)+"_mNPro_"+str(maxNProp)+"_trls_"+str(trials)+"_sFlds_"+str(bStr(onlySelectFolds))+ \
        "_PPR_"+str(pageRankOrder) + "_onlyPR_"+str(bStr(singlePR))+"_prT_"+PRType + "_bself_" + bias_self + "_d_"+str(bStr(degree) + \
        "_lim_"+str(bStr(testLimit)))+ "_rinit_"+str(bStr(randInit))+"_p_"+str(bStr(perturb)+"_pro_"+str(bStr(usePro)))+"_lH_"+str(bStr(lastH))+"_lR_"+str(bStr(lastHStartRan))+"_CTV_"+str(changeTrainValid)
    print("test Name: " + accuracyOutputName)

    save_path = save_path_prefix + accuracyOutputName

    print(selectFolds)
    #if single PR, set to blank on collective runs
    if singlePR:
        attr1 = 'blank'
    else:
        attr1 = attr1d

    #i=0; m=0; j=1;

    #create input data only varying i, m, and j
    paramsList = []
    results = []
    for i in range(0, trials):
        for m, j in enumerate(selectFolds):
            localsCopy = locals().copy()
            #debug
            if debug:
                results.append(experiment1(localsCopy))
            paramsList.append(localsCopy)

            #debugging
            #experiment1(localsCopy)

    #run in parallel with max number of processes
    if not debug:
        pool = Pool(processes=numProcesses)
        results = pool.map(experiment1, paramsList)

    #read data to keep record of the original network
    G = readData.readDataset(dataFolder,
                             fName,
                             sampleAttrs=sampleAttrs,
                             averageNeighborAttr=avgNeighb,
                             degree=degree,
                             neighbDegree=neighbDegree,
                             localClustering=localClustering,
                             no0Deg=no0Deg)
    GFirst = G
    accTrials2 = {
        netType + "_Test": np.zeros((trials, numSelectFolds)),
        netType + "_Train": np.zeros((trials, numSelectFolds)),
        netType + "_Valid": np.zeros((trials, numSelectFolds)),
        netType + "_Test_C": np.zeros((trials, numSelectFolds))
    }

    #now parse all results, assuming they are in order (pool.map preserves this)
    counter = -1
    for i in range(0, trials):
        for m, j in enumerate(selectFolds):
            counter += 1
            retDict = results[counter]
            setNames = ["Train", "Valid", "Test", "Test_C"]
            for name in setNames:
                accTrials2[netType + "_" + name][i][m] = retDict['accuracy' +
                                                                 name]

    #graph and record data
    if onlySelectFolds:
        t = [
            round(
                round(percentValidation, 5) + round((fold + 1) * percentBy, 5),
                5) for fold in selectFolds
        ]
    else:
        t = arange(round(percentValidation + percentBy, 5),
                   round(percentRest + percentValidation, 5) - 0.0001,
                   round(percentBy, 5))

    #graph_helper.plotBAE(accTrials2, accuracyOutputName+"_Plot", t)
    graph_helper.printNPBAE(accTrials2, accuracyOutputName)

    #print aggregated results
    strBuilder = ""
    for key in accTrials2:
        baes = []
        mean = np.mean(accTrials2[key], axis=0)
        strBuilder = strBuilder + str(key) + ": " + str(mean[0]) + ", "
    print(strBuilder)
    #print(accTrials2)
    elapsed = time.time() - startTime
    print("total time: " + str(elapsed))
Esempio n. 5
0
def experiment1(variables):

    #this is so that we can pass in all vars and update all locals to these vars
    exec ""
    locals().update(variables)
    if gpu != "cpu":
        import theano.sandbox.cuda
        theano.sandbox.cuda.use(gpu)

    from code.RelationalModels.RelationalLSTM import RelationalLSTM
    from code.RelationalModels.RelationalLSTM_2 import RelationalLSTM_2
    from code.RelationalModels.RelationalRNNwMini import RelationalRNNwMini
    from code.RelationalModels.RelationalLSTMwMini import RelationalLSTMwMini
    from code.RelationalModels.RelationalLRAVG import RelationalLRAVG
    from blocks.filter import VariableFilter
    from blocks.roles import PARAMETER

    startTime = time.time()
    G = readData.readDataset(dataFolder,
                             fName,
                             sampleAttrs=sampleAttrs,
                             averageNeighborAttr=avgNeighb,
                             degree=degree,
                             neighbDegree=neighbDegree,
                             localClustering=localClustering,
                             no0Deg=no0Deg)
    GFirst = G

    retDict = {}

    lastHs = None
    PPRs = None
    #read trial from file

    nodeData = readData.readTrial(dataFolder, fName, i, percentValidation,
                                  changeTrainValid)
    validationNodes2 = []
    if changeTrainValid > 3:
        rest = nodeData[0]
        validationNodes = nodeData[1]
        validationNodes2 = nodeData[2]
    else:
        rest = nodeData[0]
        validationNodes = nodeData[1]

    #prune out nodes that don't exist in GFirst
    rest = graph_helper.prune0s(GFirst, rest)
    validationNodes = graph_helper.prune0s(GFirst, validationNodes)
    validationNodes2 = graph_helper.prune0s(GFirst, validationNodes2)

    #split into folds
    folds = readData.splitNodeFolds(rest, numFolds)

    #vary training set
    foldsStart = time.time()
    #m is index into array, j is fold index. they are the same if we are looping over all folds

    #add up trainNodes
    trainNodes = []
    for k in range(0, j + 1):
        trainNodes += folds[k]

    #add up rest of nodes
    testNodes = []
    for k in range(j + 1, numFolds):
        testNodes += folds[k]

    #if we don't want to partition into traditional validation set
    #we simply set train set to validation set
    if noValid:
        trainAll = trainNodes + validationNodes
        shuffle(trainAll)
        totalTrain = int(len(trainAll) * 0.4)
        trainNodes = trainAll[:totalTrain]
        validationNodes = trainAll[totalTrain:]

    # if we are doing PPR, then change G to the one associated with individual folds/trial
    # otherwise G is always the same
    if pageRankOrder == "for" or pageRankOrder == "back":
        PPRs = pickle.load(
            open(
                dataFolder +
                fName.replace("amazon_Music_64500", "amazon_Music_7500") +
                "_10pr_" + PRType + "_trial_" + str(i % 10) + "_fold_" +
                str(j) + ".p", 'rb'))
        G = readData.readDataset(dataFolder,
                                 fName,
                                 sampleAttrs=sampleAttrs,
                                 averageNeighborAttr=avgNeighb,
                                 degree=False,
                                 neighbDegree=neighbDegree,
                                 localClustering=localClustering,
                                 pageRankOrder=pageRankOrder,
                                 PPRs=PPRs,
                                 maxNeighbors=maxNeighbors,
                                 bias_self=bias_self,
                                 trainNodes=trainNodes + validationNodes,
                                 testNodes=testNodes,
                                 testLimit=testLimit,
                                 no0Deg=no0Deg)
        if degree:
            graph_helper.transferAttr(GFirst, G, 'degree')

    actual_save_path = save_path + "_trial_" + str(i) + "_fold_" + str(j)
    if not randInit:
        if netType == "LSTM":
            rnn = RelationalLSTM(G,
                                 trainNodes,
                                 validationNodes,
                                 dim=memory,
                                 batch_size=batch_size,
                                 num_epochs=num_epochs,
                                 save_path=actual_save_path,
                                 max_epochs=max_epochs,
                                 maxNeighbors=maxNeighbors,
                                 attrKey=attr1,
                                 debug=debug,
                                 usePrevWeights=usePrevWeights,
                                 epsilon=epsilon,
                                 pageRankOrder=pageRankOrder,
                                 batchesInferences=batchesInferences,
                                 usePro=usePro)
        elif netType == "LSTM2":
            rnn = RelationalLSTM_2(G,
                                   trainNodes,
                                   validationNodes,
                                   dim=memory,
                                   summary_dim=memory,
                                   batch_size=batch_size,
                                   num_epochs=num_epochs,
                                   save_path=actual_save_path,
                                   max_epochs=max_epochs,
                                   maxNeighbors=maxNeighbors,
                                   attrKey=attr1,
                                   debug=debug,
                                   usePrevWeights=usePrevWeights,
                                   epsilon=epsilon,
                                   pageRankOrder=pageRankOrder,
                                   batchesInferences=batchesInferences,
                                   usePro=usePro)
        elif netType == "RNNwMini":
            rnn = RelationalRNNwMini(G,
                                     trainNodes,
                                     validationNodes,
                                     perturb=perturb,
                                     dim=memory,
                                     mini_dim=mini_dim,
                                     summary_dim=memory + mini_dim,
                                     batch_size=batch_size,
                                     num_epochs=num_epochs,
                                     save_path=actual_save_path,
                                     max_epochs=max_epochs,
                                     maxNeighbors=maxNeighbors,
                                     attrKey=attr1,
                                     debug=debug,
                                     usePrevWeights=usePrevWeights,
                                     epsilon=epsilon,
                                     pageRankOrder=pageRankOrder,
                                     batchesInferences=batchesInferences)
        elif netType == "LSTMwMini":
            rnn = RelationalLSTMwMini(G,
                                      trainNodes,
                                      validationNodes,
                                      perturb=perturb,
                                      dim=memory,
                                      mini_dim=mini_dim,
                                      summary_dim=memory + mini_dim,
                                      batch_size=batch_size,
                                      num_epochs=num_epochs,
                                      save_path=actual_save_path,
                                      max_epochs=max_epochs,
                                      maxNeighbors=maxNeighbors,
                                      maxNeighbors2=maxNeighbors2,
                                      attrKey=attr1,
                                      debug=debug,
                                      usePrevWeights=usePrevWeights,
                                      epsilon=epsilon,
                                      pageRankOrder=pageRankOrder,
                                      batchesInferences=batchesInferences)
        elif "LRAVG" in netType:
            rnn = RelationalLRAVG(G,
                                  netType=netType.replace("LRAVG", ""),
                                  trainNodes=trainNodes,
                                  validationNodes=validationNodes,
                                  testNodes=testNodes)
        rnn.train()
        if lastH:
            lastHs = rnn.generateHidden("train")
            lastHs.update(rnn.generateHidden("valid"))

        #DON'T dynamically change test nodes labels
        accuracyTrain, curPredsTrain = rnn.makePredictions(
            trainNodes,
            maxNeighbors,
            changeLabel=False if changeTrainValid > -1 else True,
            lastH=False)
        retDict['accuracyTrain'] = accuracyTrain

        #DON'T dynamically change test nodes labels
        accuracyValid, curPredsValid = rnn.makePredictions(
            validationNodes,
            maxNeighbors,
            changeLabel=False if changeTrainValid > -1 else True,
            lastH=False)
        retDict['accuracyValid'] = accuracyValid

        accuracyValid2, curPredsValid2 = rnn.makePredictions(validationNodes2,
                                                             maxNeighbors,
                                                             lastH=False)
        retDict['accuracyValid2'] = accuracyValid2

        #dynamically change test nodes labels
        if lastH:
            if "swap" in dataAug:
                #iterate through all nodes to get hidden states
                tempT, tempPred, hiddenRepT = rnn.makePredictions(
                    trainNodes, maxNeighbors, changeLabel=False, lastH=True)
                tempV, tempPred, hiddenRepV = rnn.makePredictions(
                    validationNodes,
                    maxNeighbors,
                    changeLabel=False,
                    lastH=True)
                lastHs.update(hiddenRepT)
                lastHs.update(hiddenRepV)

            accuracyTest, curPredsTest, hiddenRep = rnn.makePredictions(
                testNodes, maxNeighbors, lastH=True)
            lastHs.update(hiddenRep)
        else:
            accuracyTest, curPredsTest = rnn.makePredictions(testNodes,
                                                             maxNeighbors,
                                                             lastH=False)
        retDict['accuracyTest'] = accuracyTest

        #save the actual predictions
        np.save(actual_save_path + "_pre_Tra", np.array(curPredsTrain.items()))
        np.save(actual_save_path + "_pre_Val", np.array(curPredsValid.items()))
        np.save(actual_save_path + "_pre_Val2",
                np.array(curPredsValid2.items()))
        np.save(actual_save_path + "_pre_Test", np.array(curPredsTest.items()))

        np.save(actual_save_path + "_BAE_Test", np.array(accuracyTest))
        np.save(actual_save_path + "_BAE_Tra", np.array(accuracyTrain))
        np.save(actual_save_path + "_BAE_Val", np.array(accuracyValid))
        np.save(actual_save_path + "_BAE_Val2", np.array(accuracyValid2))
        print("BAE_Tra: " + str(accuracyTrain))
        print("BAE_Val: " + str(accuracyValid))
        print("BAE_Val2: " + str(accuracyValid2))
        print("BAE_Test: " + str(accuracyTest))

        computeAccuracies(G, curPredsTrain, actual_save_path, "Tra")
        computeAccuracies(G, curPredsValid, actual_save_path, "Val")
        computeAccuracies(G, curPredsValid2, actual_save_path, "Val2")
        computeAccuracies(G, curPredsTest, actual_save_path, "Test")
    else:
        graph_helper.setLabels(G, trainNodes,
                               validationNodes + validationNodes2, testNodes,
                               changeTrainValid)

    #also dynamically change validation nodes if we desire
    #rnn.makePredictions(validationNodes, maxNeighbors)
    localsCopy = globals().copy()
    localsCopy.update(locals())
    #localsCopy = locals().copy()
    test_bae, rnn2, best = trainRnnCollective(**localsCopy)

    #if randInit, replace with actual collective performance
    if randInit:
        retDict['accuracyTrain'] = best['Train_acc']
        retDict['accuracyValid'] = best['Valid_acc']
        retDict['accuracyTest'] = best['Test_acc']

    print("test_bae: " + str(test_bae))
    retDict['accuracyTest_C'] = test_bae
    elapsed = time.time() - startTime

    print("trial: " + str(i) + ", fold: " + str(j) + ", time: " + str(elapsed))
    return retDict
Esempio n. 6
0
def savePPRtype(fName, dataFolder, trial, fold, prType, debug=True):
    numFolds = 17
    Gorig = readData.readDataset(dataFolder, fName)

    startOverall = time.time()

    rest, validationNodes = readData.readTrial(fName, trial, 0.15)
    folds = readData.splitNodeFolds(rest, numFolds)

    startFold = time.time()
    trainNodes = []
    testNodes = []
    for i in range(0, fold + 1):
        trainNodes += folds[i]
    trainNodes += validationNodes
    for i in range(fold + 1, numFolds):
        testNodes += folds[i]

    testNodes = testNodes
    #copy and then run page rank
    G = copy.deepcopy(Gorig)
    if prType == 'pos':
        pr, top10pr = computePersonalizedPR(G,
                                            trainNodes,
                                            testNodes,
                                            label='pos',
                                            debug=debug)
        pickle.dump(
            pr,
            open(
                dataFolder + fName + "_fullpr_pos_trial_" + str(trial) +
                "_fold_" + str(fold) + ".p", "wb"))
        pickle.dump(
            top10pr,
            open(
                dataFolder + fName + "_10pr_pos_trial_" + str(trial) +
                "_fold_" + str(fold) + ".p", "wb"))
    elif prType == 'neg':
        pr, top10pr = computePersonalizedPR(G,
                                            trainNodes,
                                            testNodes,
                                            label='neg',
                                            debug=debug)
        pickle.dump(
            pr,
            open(
                dataFolder + fName + "_fullpr_neg_trial_" + str(trial) +
                "_fold_" + str(fold) + ".p", "wb"))
        pickle.dump(
            top10pr,
            open(
                dataFolder + fName + "_10pr_neg_trial_" + str(trial) +
                "_fold_" + str(fold) + ".p", "wb"))
    elif prType == 'neutral':
        pr, top10pr = computePersonalizedPR(G,
                                            trainNodes,
                                            testNodes,
                                            debug=debug)
        #pickle.dump( pr, open( dataFolder+fName+"_fullpr_neutral_trial_"+str(trial)+"_fold_"+str(fold)+".p", "wb" ) )
        pickle.dump(
            top10pr,
            open(
                dataFolder + fName + "_10pr_neutral_trial_" + str(trial) +
                "_fold_" + str(fold) + ".p", "wb"))

    endFold = time.time()
    print("Trial " + str(trial) + " Fold " + str(fold) + ": " +
          str(endFold - startFold))
Esempio n. 7
0
def unitTest1():
    fName = "facebook"
    numFolds = 10
    maxFolds = 8
    trial = 0
    Gorig = readData.readDataset("data/" + fName + ".edges",
                                 "data/" + fName + ".attr",
                                 "data/" + fName + ".lab")
    rest, validationNodes = readData.readTrial(fName, trial, 0.15)
    folds = readData.splitNodeFolds(rest, numFolds)

    trainNodes = []
    testNodes = []
    for i in range(0, maxFolds):
        trainNodes += folds[i]
    trainNodes += validationNodes
    for i in range(maxFolds, numFolds):
        testNodes += folds[i]

    testNodes = testNodes[0:10]
    Gpos = copy.deepcopy(Gorig)
    testPRpos = computePersonalizedPR(Gpos, trainNodes, testNodes, label='pos')
    Gneg = copy.deepcopy(Gorig)
    testPRneg = computePersonalizedPR(Gneg, trainNodes, testNodes, label='neg')
    Gnn = copy.deepcopy(Gorig)
    testPRneutral = computePersonalizedPR(Gnn, trainNodes, testNodes)
    Gsimilar = copy.deepcopy(Gorig)
    testPRsimilar = computePersonalizedPR(Gsimilar,
                                          trainNodes,
                                          testNodes,
                                          label='similar')

    for node in testPRpos.keys():
        sorted_PRpos = sorted(testPRpos[node].items(),
                              key=operator.itemgetter(1))
        sorted_PRneg = sorted(testPRneg[node].items(),
                              key=operator.itemgetter(1))
        sorted_PRneutral = sorted(testPRneutral[node].items(),
                                  key=operator.itemgetter(1))
        sorted_PRsimilar = sorted(testPRsimilar[node].items(),
                                  key=operator.itemgetter(1))

        norm2pneut = 0.0
        norm2nn = 0.0
        norm2pneg = 0.0
        norm2psimilar = 0.0
        for key in testPRpos[node].keys():
            norm2pneut += math.pow(
                testPRpos[node][key] - testPRneutral[node][key], 2)
            norm2nn += math.pow(
                testPRneg[node][key] - testPRneutral[node][key], 2)
            norm2pneg += math.pow(testPRpos[node][key] - testPRneg[node][key],
                                  2)
            norm2psimilar += math.pow(
                testPRpos[node][key] - testPRsimilar[node][key], 2)
        norm2pneut = math.sqrt(norm2pneut)
        norm2nn = math.sqrt(norm2nn)
        norm2pneg = math.sqrt(norm2pneg)
        norm2psimilar = math.sqrt(norm2psimilar)
        print("norm2pneut: " + str(norm2pneut))
        print("norm2nn: " + str(norm2nn))
        print("norm2pneg: " + str(norm2pneg))
        print("norm2psimilar: " + str(norm2psimilar))

    pickle.dump(
        testPRpos,
        open(
            "data/" + fName + "_pos_trial_" + str(trial) + "_fold_" +
            str(maxFolds) + ".p", "wb"))
    testPRpos2 = pickle.load(
        open(
            "data/" + fName + "_pos_trial_" + str(trial) + "_fold_" +
            str(maxFolds) + ".p", "rb"))
    print("here")