def getData(filename, filepath): dataset = [] labels = [] currFilename = filename + str(i) + ".pkl" nextFilename = filename + str(i+1) + ".pkl" currTimestep = loadData(currFilename, filepath) nextTimestep = loadData(nextFilename, filepath) dataset, labels = createTimestep(currTimestep, nextTimestep)
def getData(filename, filepath, dataset, labels, timestep=None): assert (timestep is not None) print "at timestep: ", timestep currFilename = filename + str(timestep) + ".pkl" nextFilename = filename + str(timestep + 1) + ".pkl" currTimestep = loadData(currFilename, filepath) nextTimestep = loadData(nextFilename, filepath) dataset, labels = createTimestep(list(dataset), list(labels), currTimestep, nextTimestep) return np.array(dataset), np.array(labels)
def __init__(self): #Load the data self.dataSet, self.labels = dataLoader.loadData( '.\\data\\trainingData.mat') self.dataSet_valid, self.labels_valid = dataLoader.loadData( '.\\data\\testData.mat') #Initialize variables self.epoch = 0 self.current_sample = 0 self.train_size = self.dataSet.shape[0] #Create a list od indices and shuffle them self.train_rand_idx = list(range(0, self.train_size)) random.shuffle(self.train_rand_idx)
def score(): time = 5 meanAcc = 0 names = ["CORA", "CITESEER", "PUBMED"] for name in names: for i in range(time): #nodes are labeled from 0 to N - 1 #trainData{node:numpy array(N, 1), # edge:numpy array(M, 2), # node_attr:numpy array(N, D), # ID: (N1, 1) numbers in range 0 to N - 1 # label:numpy array(N1,1)} #testData{node:numpy array(N, 1), # edge:numpy array(M, 2), # node_attr:numpy array(N, D), # ID: (N2, 1)} numbers in range 0 to N - 1} #testLabel:numpy array(N2,1) #N1 + N2 = N #loadData will random split nodeID in train and test, the split rate is 2:8 trainData, testData, testLabel = loadData(name) trainedModel = model.train(trainData) #return a numpy array of (N2, 1) contains the predicted label of test nodes predictedLabel = model.test(testData) meanACC += accuracy(testLabel, predictedLabel) meanACC = meanAcc * 1.0 / time / len(names) if __name__ == '__main__': score()
def loadDataFromFiles(path): data = {} files = os.listdir(path) for item in files: nameClassifier = os.path.splitext(item)[0] data[nameClassifier] = loadData(os.path.join(path, item)) return data
def analyseUncertainties(): pathFile = os.path.join('machine_learner', 'collected_data', 'overall_adaptation_options.json') adapResults = loadData(pathFile) # Get the minimum and maximum value for energy consumption over all configurations (used in graphs) ecResults = [[ao.ec for ao in adapResult] for adapResult in adapResults] # flatten the list ecResults = [item for sublist in ecResults for item in sublist] minEC, maxEC = min(ecResults), max(ecResults) # sort the items of interest in ascending order # sortFunction = lambda x, y: -1 if x.getScatterRate() < y.getScatterRate() else (0 if x.getScatterRate() == y.getScatterRate() else 1) # worstScatterRate = sorted([result for result in adapResults], key=functools.cmp_to_key(sortFunction), reverse=False) # sortFunction = lambda x, y: -1 if x.getAmtOfWrongPredictions() < y.getAmtOfWrongPredictions() else (0 if x.getAmtOfWrongPredictions() == y.getAmtOfWrongPredictions() else 1) # worstPredictionRate = sorted([result for result in adapResults], key=lambda res: res.getAmtOfWrongPredictions()[0], reverse=True) print() # =================== # Plotting of results # =================== # Plots for the 5 adaptation options with the worst scatter rate for i in range(5): pass # plotRegressionPredictions(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}') # plotLearningEvolution(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}') # plotEffectUncertainties(worstScatterRate[i], f'worstScatter{i+1}') # plotLearningEffectOneCycle(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}') # plotLearningEffect2Cycles(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}') # printProgressBar(i+1, 5, prefix="Progress worst scatter rates:", suffix='Complete', length=30) # Plots for the 20 adaptation options with the worst predictions for i in range(20): pass # print(f"Error rate configuration {i+1}: {worstPredictionRate[i].getAmtOfWrongPredictions()}") # plotLearningEvolution(worstPredictionRate[i], minEC, maxEC, f'worstPrediction{i+1}') # plotEffectUncertainties(worstPredictionRate[i], f'worstPrediction{i+1}') # printProgressBar(i+1, 20, prefix="Progress worst prediction graphs:", suffix='Complete', length=30) # Plots for adaptationOptions which have at most 100 samples more on either side of the cutoff line in comparison to the other side filteredOptions = list( filter(lambda x: x.getScatterRate() <= 100, adapResults)) for i in range(len(filteredOptions)): pass # plotLearningEvolution(filteredOptions[i], minEC, maxEC, f'filtered{i+1}') # Plots for all adaptation options for i in range(len(adapResults)): plotRegressionPredictions(adapResults[i], minEC, maxEC, f'all{i+1}') plotLearningEvolution(adapResults[i], minEC, maxEC, f'all{i+1}') printProgressBar(i + 1, len(adapResults), prefix="Progress all graphs:", suffix='Complete', length=30)
def analyseUncertainties(filename = os.path.join('machine_learner', 'collected_data', 'overall_adaptation_options.json')): # pathFile = os.path.join('machine_learner', 'collected_data', 'overall_adaptation_options.json') adapResults = loadData(filename) # Get the minimum and maximum value for energy consumption over all configurations (used in graphs) ecResults = [[ao.ec for ao in adapResult] for adapResult in adapResults] # flatten the list ecResults = [item for sublist in ecResults for item in sublist] # minEC, maxEC = min(ecResults), max(ecResults) print() # =================== # Plotting of results # =================== # Plots for all adaptation options # for i in range(len(adapResults)): # plotSingleConfiguration(adapResults[i], f'all{i+1}') # # plotLearningEvolution(adapResults[i], f'all{i+1}') # printProgressBar(i+1, len(adapResults), prefix="Progress all graphs:", suffix='Complete', length=30) # indices = [1, 2, 3, 4, 5, 55, 58, 60, 61, 72, 78, 112, 164, 168, 170] # Classification DeltaIoTv1 # indices = [1, 2, 3, 37, 38, 41, 42, 44, 60, 61, 64, 65, 66, 71, 73, 83] # Regression DeltaIoTv1 # indices = [367,769,2122,2179,2206] # Classification DeltaIoTv2 # indices = [] # Regression DeltaIoTv2 # indices = [3,31,262,646,742,727] # Difficult configurations classification DeltaIoTv2 indices = [31] index = 0 printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30) for i in indices: plotSingleConfiguration(adapResults[i-1], f'configuration_{i}') # plotLearningEvolution(adapResults[i-1], f'configuration_{i}') index += 1 printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30) # indices = [173, 174, 363, 365, 366, 384, 392, 411, 741, 744, 749, 750, 751, 752, 753, 756, 759, 761, 762, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 830, 831, 832, 833, 834, 852, 854, 855, 857, 858, 860, 861, 888, 915, 933, 939, 942, 969, 1134, 1542, 1544, 1545, 1546, 1547, 1548, 2095, 2096, 2097, 2111, 2112, 2113, 2114, 2115, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2177, 2178, 2193, 2194, 2195, 2196, 2202, 2203, 2204, 2205, 2258, 2259, 2284, 2285, 2286, 2547, 2555, 2556, 3114, 3122, 3123, 3222, 3254, 3255, 3256, 3257, 3258, 3263, 3264, 3265, 3266, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3284, 3285, 3292, 3293, 3294, 3298, 3299, 3300, 3301, 3302, 3303, 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365, 3366, 3474, 3482, 3489, 3490, 3491, 3492, 3506, 3507, 3509, 3510, 3513, 3514, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3546, 3550, 3551, 3552, 3553, 3554, 3555, 3560, 3561, 3562, 3563, 3564, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3577, 3578, 3579, 3580, 3581, 3582, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3662, 3663, 3669, 3671, 3672, 3681, 3735, 3742, 3743, 3744, 3751, 3752, 3753, 3758, 3759, 3760, 3761, 3762, 3766, 3767, 3768, 3769, 3770, 3771, 3807, 3813, 3814, 3815, 3816, 3821, 3822, 3823, 3824, 3825, 3830, 3831, 3832, 3833, 3834, 3837, 3838, 3840, 3841, 3842, 3843, 3847, 3848, 3849, 3850, 3851, 3852, 3922, 3923, 3930, 3931, 3932, 3933, 3995, 4002, 4004, 4005, 4011, 4012, 4013, 4014, 4050, 4059, 4067, 4068, 4093, 4094, 4095] # # indices = [363, 3523, 3524, 3578, 3587] # indices = [741] # index = 0 # print() # printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30) # for i in indices[::-1]: # plotEffectUncertaintiesPLLA(adapResults[i], f'configuration_{i}') # index += 1 # printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30) print()
def hiddenRepTSNE(args): #Setting the size and the number of channel depending on the dataset if args.dataset == "MNIST": inSize = 28 inChan = 1 elif args.dataset == "CIFAR10": inSize = 32 inChan = 3 else: raise("netMaker: Unknown Dataset") net = netBuilder.CNN(inSize=inSize,inChan=inChan,chan=int(args.encchan),avPool=False,nbLay=int(args.encnblay),\ ker=int(args.encker),maxPl1=int(args.encmaxpl1),maxPl2=int(args.encmaxpl2),applyDropout2D=0,nbOut=0,\ applyLogSoftmax=False,nbDenseLay=0,sizeDenseLay=0) if args.cuda: net.cuda() net.setWeights(torch.load(args.tsne[0]),cuda=args.cuda,noise_init=0) train_loader,test_loader = dataLoader.loadData(args.dataset,int(args.batch_size),int(args.test_batch_size),args.cuda,int(args.num_workers)) loader = train_loader if args.train else test_loader #Choosing a random batch of images among the dataset data,target = next(iter(loader)) if args.cuda: data = data.cuda() target = target.cuda() #Computes the hidden representation of the batch of images repre,_,_ = net.convFeat(data) colors = cm.rainbow(np.linspace(0, 1, 10)) #visualisation of the transformed data repre = repre.view(data.size(0),-1).cpu().detach().numpy() repre_emb = TSNE(n_components=2,init='pca',learning_rate=20).fit_transform(repre) plotEmb(repre_emb,target,"../vis/{}/{}_tsne.png".format(args.exp_id,args.ind_id),colors) ##Visualization of the raw data repre = data.view(data.size(0),-1).cpu().detach().numpy() repre_emb = TSNE(n_components=2,init='pca',learning_rate=20).fit_transform(repre) plotEmb(repre_emb,target,"../vis/{}/{}_tsne_raw.png".format(args.exp_id,args.ind_id),colors)
def failuresCases(args): _,test_loader = dataLoader.loadData(dataset=args.dataset,batch_size=args.batch_size,test_batch_size=1,cuda=False) #Count the number of net in the experiment1 netNumber = len(glob.glob("../nets/{}/*.ini".format(args.exp_id))) #Get and sort the experiment file if args.train: scorFiles = sortExperiFiles("../results/"+args.exp_id+"/*epoch*_train.csv",netNumber) else: scorFiles = sortExperiFiles("../results/"+args.exp_id+"/*epoch*[0-9].csv",netNumber) imgCounter = 0 netId=args.failurecases[0] epoch=args.failurecases[1] for data, origTarget in test_loader: if imgCounter%10 == 0: print("Img ",imgCounter) if not (os.path.exists("../vis/{}/net{}".format(args.exp_id,netId))): os.makedirs("../vis/{}/net{}".format(args.exp_id,netId)) if not os.path.exists("../vis/{}/net{}/epoch{}".format(args.exp_id,netId,epoch)): os.makedirs("../vis/{}/net{}/epoch{}".format(args.exp_id,netId,epoch)) csv = genfromtxt(scorFiles[netId,epoch], delimiter=',') output = csv[imgCounter,2:4] binaryTarget = csv[imgCounter,1] fullTarget = csv[imgCounter,0] clusters = csv[imgCounter,4:9] if np.argmax(output) != binaryTarget: vis.writeImg("../vis/{}/net{}/epoch{}/{}.jpg".format(args.exp_id,netId,epoch,imgCounter),data[0][0].detach().numpy()) imgCounter += 1
def compareResultsClassifiers(inputPath, outputPath): files = sorted(os.listdir(inputPath)) bestSample = (100, '') header = [ 'Technique', 'Loss function', 'Penalty', 'Scaler', 'Overall error percentage', 'F1 score (weighted)', 'Matthews correlation coefficient', 'F1 scores (class 0 - 3)' ] outputData = {'header': header, 'values': []} csvOutputFile = open(os.path.join(outputPath, CSV_GENERAL_NAME), mode='w') csvOutputWriter = csv.writer(csvOutputFile, delimiter=',') csvOutputWriter.writerow(header) confMatrices = {'all': {}, 'versatile': {}} index = 0 printProgressBar(index, len(files), prefix='Processing of classifiers:', suffix='Complete', length=30) # The key is the used classifier, value is the data associated with the classifier for filename in files: configurations = loadData(os.path.join(inputPath, filename)) key = os.path.splitext(filename)[0] classifier, loss, penalty, scaler = key.split('_') # The overall percentage of errors in the predicted values errorPercentageOverall = getErrorRate(configurations) # Skip the classifiers with an error rate over 50% if errorPercentageOverall > 30: # NOTE: removes the file (make sure it is stored somewhere else as well) pass # os.remove(os.path.join(inputPath,filename)) # index += 1 # printProgressBar(index, len(files), prefix='Processing of classifiers:', suffix='Complete', length=30) # continue if errorPercentageOverall < bestSample[0]: bestSample = (errorPercentageOverall, filename) # Confusion matrix for all configurations overallConfMatrix = getConfusionMatrix(configurations) confMatrices['all'][f'{classifier}_{loss}_{penalty}_{scaler}'] = ( overallConfMatrix) printConfusionMatrix( overallConfMatrix, outputPath, f'ConfusionMatrixAll_{classifier}_{loss}_{penalty}_{scaler}') matthewsCorrCoef = calculateMatthewsCorrCoef(configurations) # F1 values of all the configurations F1All = calculateF1Scores(configurations) F1AllWeighted = calculateF1Score(configurations) F1AllStr = '[' + ','.join([f'{i:.4f}' for i in F1All]) + ']' del configurations row = [ classifier, loss, penalty, scaler, f'{errorPercentageOverall:.2f}%', f'{F1AllWeighted:.4f}', f'{matthewsCorrCoef:.4f}', f'{F1AllStr}' ] outputData['values'].append(row) csvOutputWriter.writerow(row) index += 1 printProgressBar(index, len(files), prefix='Processing of classifiers:', suffix='Complete', length=30) csvOutputFile.close() print() print(f'Best sample ({bestSample[0]:.2f}%): {bestSample[1]}') print( f'Best Matthews correlation coefficient: {max([float(i[6]) for i in outputData["values"]]):.4f}' ) print( f'Best F1 score (weighted): {max([float(i[5]) for i in outputData["values"]]):.4f}' ) writeConfMatricesToFiles(confMatrices['all'], CSV_CONFALL_NAME, outputPath) printTable(outputData, outputPath)
import numpy as np import dataLoader as loader import urllib dictTypes = {0: u"爱情片", 1: u"动作片"} def classify(inX, mat, labels, k): mat = inX - mat mat = mat**2 sum = mat.sum(axis=1) sum = sum**0.5 sortIndices = sum.argsort() dict = {} for i in range(k): type = labels[sortIndices[i]] dict[type] = dict.get(type, 0) + 1 print(dict) sortedTypes = sorted(dict, key=lambda i: i[0], reverse=True) return dictTypes[int(sortedTypes[0][0])] if __name__ == "__main__": mat, labels = loader.loadData("./Classifier/Classifier/samples/result.txt") X = np.array(mat)[:, 1:] Y = np.array(labels) inX = np.array([[30, 20]]) curType = classify(inX, X, Y, 3) print("The type of the film is ", curType)
import time import dataLoader from itertools import combinations positions = dataLoader.loadData("CrowdsourcingResults.csv") dataLoader.printPositions(positions) print "" print "" bold = lambda val: ("*" + str(val) + "*") def getHighestKey(positions, pos, key, usedPlayers=[]): bestPlayer = None def doBest(pos, bestPlayer=None): for player in positions[pos]: if player in usedPlayers: continue elif bestPlayer == None: bestPlayer = player else: if float(player[key]) > float(bestPlayer[key]): # print bestPlayer["Player"], "->", player["Player"] bestPlayer = player return bestPlayer if pos == "DH": # Any player can be a DH for position in positions:
poly6 = Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(0.,-1.),(1.,0.)], 6, imbedDim) poly7 = Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,-1.),(1.,0.)], 7, imbedDim) # poly8 = polytope.Polytope([], 0, imbedDim) # poly9 = polytope.Polytope([(-1.,2.),(-1.,-1.),(2.,-1.)], 1, imbedDim) # poly10 = polytope.Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(1.,0.)], 2, imbedDim) # poly11 = polytope.Polytope([(-1.,2.),(-1.,-1.),(1.,-1.),(1.,0.)], 3, imbedDim) # poly12 = polytope.Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,0.)], 4, imbedDim) # poly13 = polytope.Polytope([(-1.,2.),(-1.,-1.),(-1.,0.),(1.,0.)], 5, imbedDim) # poly14 = polytope.Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(0.,-1.),(1.,0.)], 6, imbedDim) # poly15 = polytope.Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,-1.),(1.,0.)], 7, imbedDim) polytopes = [poly0, poly1, poly2, poly3, poly4, poly5, poly6, poly7] ## Note batch_size must divide evenly into len(train_data_raw) train_data, test_data = loadData(polytopes, batch_size=12) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Running on", device) # device = torch.device("cpu") EPOCHS = 100 net = Net([2 * imbedDim, 10, 8], imbedDim, device).to(device) net.train(train_data, test_data, EPOCHS) accuracy = net.test(test_data) if accuracy * 100 >= 80: torch.save(net.state_dict(),'model4.pt')
def mainProcess(singleTopic, onlyOne): ''' _____ _ |_ _| | | | | _ __ _ __ _ _| |_ | || '_ \| '_ \| | | | __| _| || | | | |_) | |_| | |_ \___/_| |_| .__/ \__,_|\__| | | |_| ''' np.set_printoptions(threshold=np.nan) # inputs = basicInput.basic() optimize = inputs['optimize'] allTrainingData = inputs['allTrainingData'] jsonInRedis = inputs['jsonInRedis'] latestFileNumber = int(inputs['latestFileNumber']) includeSingles = inputs['includeSingles'] # (optimize, allTrainingData, jsonInRedis, latestFileNumber, includeSingles) = inputs ''' _ _ | | | | | | ___ __ _ __| | | | / _ \ / _` |/ _` | | |___| (_) | (_| | (_| | \_____/\___/ \__,_|\__,_| ''' dataToLoad = 'trainingData' if allTrainingData else 'subsetTraining' dbStore = 15 if allTrainingData else 14 load = dataLoader.loadData(dataToLoad, 'TrainingData') topicDictionary = readTopics.readTopics() red = redis.Redis(host='localhost', port=6379, db=dbStore) initialTime = datetime.now() if jsonInRedis: if (red.exists('totalKeys')): print("---> Data will be read from Redis.") else: loadTime = datetime.now() load.getAllReports(red, jsonInRedis) text = "Loaded all Reports into Redis --->> " print(text + str(datetime.now() - loadTime)) else: (justBodies, justTopics) = returnJustBodiesAndTopics(load, red, jsonInRedis) ''' ____ / ___|___ _ __ _ __ _ _ ___ | | / _ \| '__| '_ \| | | / __| | |__| (_) | | | |_) | |_| \__ \ \____\___/|_| | .__/ \__,_|___/ |_| ''' print("Loading all Reports for TrainingData --->> " + str(initialTime)) if jsonInRedis: fileStart = load.allJSONFiles[-latestFileNumber] numberOfKeys = int(red.get('totalKeys')) startingEntry = basicInput.fileThresholds(red, fileStart) output = createCorpus.createCorpusFromRedis(red, numberOfKeys, startingEntry, topicDictionary, singleTopic, singleClassify) (myLabelMatrix, corpus) = output else: output = createCorpus.createCorpusFromFile(justBodies, justTopics, topicDictionary, singleTopic, singleClassify) (myLabelMatrix, corpus) = output text = "Loading all Reports for TestData --->> " print(text + str(datetime.now() - initialTime)) predictData = dataLoader.loadData("testData", 'TestData') reportsToPredict = [] reportNames = [] topicsInResult = {} for reportToPredict in predictData.getAllReports(None, False): reportsToPredict.append(reportToPredict.bodyText) reportNames.append(reportToPredict.documentName) if optimize: t = topicDictionary.generateMultiLabelArray(reportToPredict.topics, singleTopic) topicsInResult[reportToPredict.documentName] = t ''' _____ _ _ __ / __ \ | (_)/ _| | / \/ | __ _ ___ ___ _| |_ _ _ | | | |/ _` / __/ __| | _| | | | | \__/\ | (_| \__ \__ \ | | | |_| | \_____/_|\__,_|___/___/_|_| \__, | __/ | |___/ ''' if (optimize): (bestLabels, num) = optimizer.optimizer(initialTime, myLabelMatrix, corpus, reportsToPredict, topicsInResult, optimize, classifyAndPredict, calculateF1Score, singleTopic) if (num == 0): return storeBestResults.store(redis, singleTopic, reportNames, bestLabels) return else: (minDf, maxDf, maxFeatures) = staticValues.static() classificationRes = classifyAndPredict.predict(maxDf, minDf, maxFeatures, initialTime, myLabelMatrix, corpus, reportsToPredict, topicsInResult, optimize, None) (labelsPredicted, reportsToPredict) = classificationRes ''' _____ _____ _ _ / __ \/ ___|| | | | | / \/\ `--. | | | | | | `--. \| | | | | \__/\/\__/ /\ \_/ / \_____/\____/ \___/ ''' header = ['id'] + topicDictionary.lookupList notInTraining = notInTrainingList.notTrained() exclude = set(string.punctuation) red = redis.Redis(host='localhost', port=6379, db=6) with open('Results/Submission.csv', 'w', newline='') as outcsv: csvWriter = csv.writer(outcsv) csvWriter.writerow(header) count = 0 count2 = 0 storeRes = True checkRes = False for reportName, labels in zip(reportNames, labelsPredicted): myBodyText = makeAGuess.reshapeBodyText(reportsToPredict[count], exclude) newLabels = makeAGuess.guess(labels, notInTraining, myBodyText) if storeRes: for item in newLabels: red.sadd(reportName, item) if checkRes: for item in newLabels: member = red.sismember(reportName, item) oldTopic = item.decode("utf-8") if member and (oldTopic not in newLabels): newLabels = newLabels + (oldTopic, ) includeSingles = False if includeSingles: (newLabels, count2) = includeSingleTopics.include(red, newLabels, reportName, count2) printToSubmissionCSV.toCSV(csvWriter, reportName, newLabels, topicDictionary.lookupList) print("Included an extra " + str(count2) + " labels.") text = "CSV written / Run complete --->> " print(text + str(datetime.now() - initialTime))
''' import numpy as np import tensorflow as tf from time import time import sys from dataLoader import loadData import os seed = int(time()) np.random.seed(seed) # load data tr, vr = loadData('./ml-1m/ratings.dat', delimiter='::', seed=seed, transpose=True, valfrac=0.1) tm = np.greater(tr, 1e-12).astype('float32') # masks indicating non-zero entries vm = np.greater(vr, 1e-12).astype('float32') n_m = tr.shape[0] # number of movies n_u = tr.shape[ 1] # number of users (may be switched depending on 'transpose' in loadData) # Set hyper-parameters n_hid = 500 lambda_2 = float(sys.argv[1]) if len(sys.argv) > 1 else 60. lambda_s = float(sys.argv[2]) if len(sys.argv) > 2 else 0.013
plt.text(x + 0.2, y + 0.05, '%.2f' % y, va='center', ha='center') plt.show() def drawScatter(vecX, vecY, types, **labels): xMax = float(max(vecX)) xMin = float(min(vecX)) yMax = float(max(vecY)) yMin = float(min(vecY)) plt.xlim(xMin - 0.1, xMax + 0.1) plt.ylim(yMin - 0.1, yMax + 0.1) uniqTypes = set(types) dictTypes = {} for type in uniqTypes: dictTypes[type] = [[], []] for i in range(len(types)): dictTypes[types[i]][0].append(vecX[i]) dictTypes[types[i]][1].append(vecY[i]) for type in uniqTypes: plt.scatter(dictTypes[type][0], dictTypes[type][1], s=50) plt.show() if __name__ == "__main__": mat, types = loader.loadData("./Classifier/Classifier/samples\data1.txt") mat = np.mat(mat) vecX = mat[:, 0] vecY = mat[:, 1] drawScatter(vecX, vecY, types)
import dataLoader as dl symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] nTrainingData = 10000 nTestData = 1000 pathToData = 'C:\\Users\\t_tor\\Unsynced\\extracted_images\\' trainingData, testData = dl.loadData(symbols, nTrainingData, nTestData, pathToData)
def featureMapVariance(args): torch.manual_seed(args.seed) _,test_loader = dataLoader.loadData(dataset=args.dataset,batch_size=args.batch_size,test_batch_size=args.test_batch_size,cuda=False) netId=args.feat_map_var[0] layNb=args.feat_map_var[1] #Get and sort the experiment file weigFiles = sortExperiFiles("../nets/"+args.exp_id+"/clustDetectNet"+str(netId)+"_epoch*".format(args.exp_id),netNumber=1) paramDictPath = "../nets/"+str(args.exp_id)+"/clustDetectNet"+str(netId)+".ini" #Getting the dataset and the boolean parameter inweig #Assuming the all the net in the exp have the same dataset #and the same value for the boolean parameter inweig config = configparser.ConfigParser() config.read(paramDictPath) batch_nb = len(test_loader.dataset)//args.test_batch_size #Updating args with the argument in the config file argsDict = vars(args) for key in config['default']: if key in argsDict: if not argsDict[key] is None: cast_f = type(argsDict[key]) if cast_f is bool: cast_f = lambda x:True if x == "True" else False if config['default'][key][0] == "[" : values = config['default'][key].replace("[","").replace("]","").split(" ") argsDict[key] = [cast_f(value.replace(",","")) for value in values] else: argsDict[key] = cast_f(config['default'][key]) args = Bunch(argsDict) net = netBuilder.netMaker(args) net.eval() imgCounter = 0 #Getting the size of feature map at the desired layer img = test_loader.dataset[0][0] imgSize = net(img[None,:,:,:])[1][-3].size(-1) plt.figure() epoch_count = 0 colors = cm.rainbow(np.linspace(0, 1, args.clust)) for weigFile in weigFiles[0]: epoch_count +=1 print("Epoch",epoch_count) net.load_state_dict(torch.load(weigFile)) batch_count = 1 outputComputed = False for i in range(args.clust): open("feature_map_{}_pos_tmp.csv".format(i),'w') open("feature_map_{}_neg_tmp.csv".format(i),'w') clusDisSum = torch.zeros(args.clust) for data, origTarget in test_loader: target = mnist.merge(origTarget) if batch_count%(batch_nb//10) ==0: print("\tbatch",batch_count,"on",batch_nb) batch_count +=1 _,actArr = net(data) act = actArr[-3].view(args.clust,-1,imgSize,imgSize) for i in range(len(act)): mapsPos = mnist.masked_index(act[i],0,(target != 0).long()) mapsNeg = mnist.masked_index(act[i],0,((1-target) != 0).long()) if mapsPos.size(0) != 0: nonEmptyPos = mnist.masked_index(mapsPos,0,(mapsPos.sum(dim=1).sum(dim=1) != 0).long()) writeMap(nonEmptyPos,"feature_map_{}_pos_tmp.csv".format(i)) if mapsNeg.size(0) != 0: nonEmptyNeg = mnist.masked_index(mapsNeg,0,(mapsNeg.sum(dim=1).sum(dim=1) != 0).long()) writeMap(nonEmptyNeg,"feature_map_{}_neg_tmp.csv".format(i)) plotVariance("pos",args.clust,epoch_count,colors,netId,layNb,args.exp_id) plotVariance("neg",args.clust,epoch_count,colors,netId,layNb,args.exp_id)
def activationSparsity(args): #Count the number of net in the experiment netNumber = len(glob.glob("../nets/{}/*.ini".format(args.exp_id))) #Get and sort the experiment file weigFiles = sortExperiFiles("../nets/"+args.exp_id+"/clustDetectNet*_epoch*".format(args.exp_id),netNumber) paramDictPaths = sorted(glob.glob("../nets/"+str(args.exp_id)+"/*.ini")) config = configparser.ConfigParser() _,test_loader = dataLoader.loadData(args.dataset,args.batch_size,args.test_batch_size) #Assuming the all the net in the exp have the same dataset #and the same value for the boolean parameter inweig config.read(paramDictPaths[0]) dataset = config['default']["dataset"] inweig = (config['default']["inweig"] == 'True') clust = int(config['default']["clust"]) #Plotting the loss across epoch and nets plotHist = plt.figure(1,figsize=(8,5)) ax1 = plotHist.add_subplot(111) box = ax1.get_position() ax1.set_position([box.x0, box.y0, box.width * 0.7, box.height]) plt.xlabel('Epoch') plt.ylabel('Sparsity') handlesInp = [] handlesConv1 = [] handlesConv2 = [] #cmap = cm.get_cmap(name='rainbow') colors = cm.rainbow(np.linspace(0, 1, len(weigFiles))) for i in range(len(weigFiles)): print("Net",i) #Reading general parameters config.read(paramDictPaths[i]) paramDict = config['default'] #check if net parameter are in the config file #if they are not : using the default ones if not 'biasclu' in config['default']: config.read("clust.config") config['default']["runCuda"] = str(args.cuda) paramNamespace = Bunch(config['default']) net = netBuilder.netMaker(paramNamespace) net.eval() sparsInpMean = np.empty((len(weigFiles[0]))) sparsConv1Mean = np.empty((len(weigFiles[0]))) sparsConv2Mean = np.empty((len(weigFiles[0]))) for j in range(len(weigFiles[0])): net.load_state_dict(torch.load(weigFiles[i,j])) sparsInpMean[j] = 0 sparsConv1Mean[j] = 0 sparsConv2Mean[j] = 0 for data, origTarget in test_loader: output,actArr = net(data) cluDis = net.cluDis clusts = actArr[2] maps = actArr[-3] summed_maps = actArr[-2] sparsInpMean[j] += computeSparsity(actArr[3]).mean()*len(data)/len(test_loader.dataset) sparsConv1Mean[j] += computeSparsity(actArr[4]).mean()*len(data)/len(test_loader.dataset) sparsConv2Mean[j] += computeSparsity(actArr[5]).mean()*len(data)/len(test_loader.dataset) label = ''.join((str(param)+"="+str(paramDict[param]+",")) for param in args.spar) handlesInp += ax1.plot(sparsInpMean, label=label,color=colors[i]) handlesConv1 += ax1.plot(sparsConv1Mean, label=label,color=colors[i], dashes = [6,2]) handlesConv2 += ax1.plot(sparsConv2Mean, label=label,color=colors[i], dashes = [2,2]) ax1.set_ylim([0, 1]) legInp = plotHist.legend(handles=handlesInp, loc='upper right' ,title="Input") legConv1 = plotHist.legend(handles=handlesConv1, loc='center right' ,title="Conv1") legConv2 = plotHist.legend(handles=handlesConv2, loc='lower right' ,title="Conv2") plotHist.gca().add_artist(legInp) plotHist.gca().add_artist(legConv1) plotHist.gca().add_artist(legConv2) plt.grid() plt.savefig('../vis/{}/histo.pdf'.format(args.exp_id))
def main(argv=None): #Getting arguments from config file and command line #Building the arg reader argreader = ArgReader(argv) argreader.parser.add_argument( '--noise', type=float, metavar='NOISE', help= 'the amount of noise to add in the gradient of the clustNet (in percentage)(default: 0.1)' ) argreader.parser.add_argument( '--optim', type=str, default="SGD", metavar='OPTIM', help='the optimizer algorithm to use (default: \'SGD\')') argreader.parser.add_argument( '--init', type=str, default=None, metavar='N', help='the weights to use to initialize the detectNets') #Reading the comand line arg argreader.getRemainingArgs() #Getting the args from command line and config file args = argreader.args args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) train_loader, test_loader, perm = dataLoader.loadData( args.dataset, args.batch_size, args.test_batch_size, args.permutate, args.cuda, args.num_workers, args.crop_size_imagenet, args.train_prop) if args.write_img_ex: for i in range(10): tensor = test_loader.dataset[i][0] vis.writeImg( '../vis/{}/{}_img{}.jpg'.format(args.exp_id, args.dataset, i), tensor.detach().numpy()) origSize = tensor.size() tensor = tensor.view(-1) tensor = tensor[np.argsort(perm)] tensor = tensor.view(origSize) vis.writeImg( '../vis/{}/{}_img{}_noperm.jpg'.format(args.exp_id, args.dataset, i), tensor.detach().numpy()) #The folders where the experience file will be written if not (os.path.exists("../vis/{}".format(args.exp_id))): os.makedirs("../vis/{}".format(args.exp_id)) if not (os.path.exists("../results/{}".format(args.exp_id))): os.makedirs("../results/{}".format(args.exp_id)) if not (os.path.exists("../nets/{}".format(args.exp_id))): os.makedirs("../nets/{}".format(args.exp_id)) netType = "net" #Write the arguments in a config file so the experiment can be re-run argreader.writeConfigFile("../nets/{}/{}{}.ini".format( args.exp_id, netType, args.model_id)) #The writer for tensorboardX writer = SummaryWriter("../results/{}".format(args.exp_id)) print("Model :", args.model_id, "Experience :", args.exp_id) #Building the net net = netBuilder.netMaker(args) if args.cuda: net.cuda() startEpoch = initialize_Net_And_EpochNumber(net, args.start_mode, args.init_path, args.exp_id, args.model_id, args.cuda, netType) #Getting the contructor and the kwargs for the choosen optimizer optimConst, kwargs = get_OptimConstructor_And_Kwargs( args.optim, args.momentum) #If no learning rate is schedule is indicated (i.e. there's only one learning rate), #the args.lr argument will be a float and not a float list. #Converting it to a list with one element makes the rest of processing easier if type(args.lr) is float: args.lr = [args.lr] #Train and evaluate the clustering detecting network for several epochs lrCounter = 0 for epoch in range(startEpoch, args.epochs + 1): #This condition determines when the learning rate should be updated (to follow the learning rate schedule) #The optimiser have to be rebuilt every time the learning rate is updated if (epoch - 1) % ( (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch: kwargs['lr'] = args.lr[lrCounter] print("Learning rate : ", kwargs['lr']) optimizer = optimConst(net.parameters(), **kwargs) if lrCounter < len(args.lr) - 1: lrCounter += 1 trainDetect(net, optimizer, train_loader, epoch, writer, args) with torch.no_grad(): testDetect(net, test_loader, epoch, writer, args)
def main(argv=None): #Getting arguments from config file and command line #Building the arg reader argreader = ArgReader(argv) argreader.parser.add_argument( '--max_act', type=str, nargs='*', metavar='VAL', help= 'To visualise an image that maximise the activation of one unit in the last layer. \ The values are :\ the path to the model, \ the number of images to be created, \ the layer to optimise. Can be \'conv\' or \'dense\' \ the unit to optimise. If not indicated, the unit number i will be optimised if image has label number i.' ) argreader.parser.add_argument( '--stop_thres', type=float, default=0.000005, metavar='NOISE', help= 'If the distance travelled by parameters during activation maximisation become lesser than this parameter, the optimisation stops.' ) argreader.parser.add_argument( '--reg_weight', type=float, default=0, metavar='NOISE', help='The weight of the regularisation during activation maximisation.' ) argreader.parser.add_argument( '--plot_feat_map', type=str, nargs='*', metavar='VAL', help='To visualise the last feature map of a model. \ The values are the path to the model weights, the number of input image to be pass through \ the net and the number of final feature map to plot. \ The --exp_id, --model_id and --model must be set.') #Reading the comand line arg argreader.getRemainingArgs() #Getting the args from command line and config file args = argreader.args args.cuda = not args.no_cuda and torch.cuda.is_available() #The folders where the experience file will be written if not (os.path.exists("../vis/{}".format(args.exp_id))): os.makedirs("../vis/{}".format(args.exp_id)) if args.max_act: modelPath = args.max_act[0] nbImages = int(args.max_act[1]) layToOpti = args.max_act[2] random.seed(args.seed) #Building the net model = netBuilder.netMaker(args) model.load_state_dict(torch.load(modelPath)) _, test_loader, _ = dataLoader.loadData(args.dataset, args.batch_size, 1, False, args.cuda, args.num_workers) #Comouting image that maximises activation of the given unit in the given layer maxInd = len(test_loader.dataset) - 1 model.eval() for i, (image, label) in enumerate(test_loader): print("Image ", i) img = Variable(test_loader.dataset[i][0]).unsqueeze(0) img.requires_grad = True writeImg('../vis/{}/img_'.format(args.exp_id) + str(i) + '.jpg', image[0].detach().numpy()) if len(args.max_act) == 4: unitInd = int(args.max_act[3]) else: unitInd = label.item() opt(img,model,args.exp_id,args.model_id,i,unitInd=unitInd,lr=args.lr,momentum=args.momentum,optimType='LBFGS',layToOpti=layToOpti,\ epoch=args.epochs,nbPrint=args.log_interval,stopThre=args.stop_thres,reg_weight=args.reg_weight) if i == nbImages - 1: break if args.plot_feat_map: modelPath = args.plot_feat_map[0] nbImages = int(args.plot_feat_map[1]) nbFeatMaps = int(args.plot_feat_map[2]) margin = 2 #Building the net model = netBuilder.netMaker(args) model.load_state_dict(torch.load(modelPath)) _, test_loader, _ = dataLoader.loadData(args.dataset, args.batch_size, 1, False, args.cuda, args.num_workers) #Comouting image that maximises activation of the given unit in the given layer maxInd = len(test_loader.dataset) - 1 model.eval() bigImg = None totalW = 0 totalH = 0 sortedMapInds = getMostImportantFeatMapsInd(model, args.exp_id, args.model_id) imgLabelList = [test_loader.dataset[i] for i in range(nbImages)] imgList, _ = zip(*sorted(imgLabelList, key=lambda x: x[1])) for i in range(nbImages): img = imgList[i] inSize = img.shape[1], img.shape[2] if bigImg is None: bigImg = np.zeros((nbImages * (img.shape[1] + margin), (nbFeatMaps + 1) * (img.shape[2] + margin))) bigImg[i * (img.shape[1] + margin):(i + 1) * img.shape[1] + i * margin, :img.shape[2]] = img.squeeze() _, featMaps = model(img.unsqueeze(0)) #Taking only the most important feature map print(featMaps.shape) featMaps = featMaps[0, sortedMapInds] for j in range(1, min(11, len(featMaps[0] + 1))): img = featMaps[j].detach().numpy() img = resize(img, inSize, mode="constant", order=0, anti_aliasing=True) bigImg[i * (img.shape[0] + margin):(i + 1) * img.shape[0] + i * margin, j * (img.shape[1] + margin):(j + 1) * (img.shape[1]) + j * margin] = img totalW += img.shape[0] + margin writeImg('../vis/{}/model_{}.png'.format(args.exp_id, args.model_id), bigImg[np.newaxis], size=(300 * nbImages, 300 * min(11, len(featMaps[0] + 1))))
def main(argv=None): #Getting arguments from config file and command line #Building the arg reader argreader = ArgReader(argv) argreader.parser.add_argument( '--noise', type=float, metavar='NOISE', help= 'the amount of noise to add in the gradient of the clustNet (in percentage)(default: 0.1)' ) argreader.parser.add_argument( '--entweig', type=float, default=0, metavar='ENTWEI', help= 'the weight of the clusters entropy term in the cost function (default: 0)' ) argreader.parser.add_argument( '--clustdivers', type=float, default=0, metavar='ENTWEI', help= 'the weight of the clusters diversity term in the cost function (default: 0)' ) argreader.parser.add_argument( '--filter_dis', type=float, default=0, metavar='FILDIS', help= 'the weight of the filter distance term in the cost function (default: 0)' ) argreader.parser.add_argument( '--featmap_entr', type=float, default=0, metavar='FEATENT', help= 'the weight of the feature map entropy term in the cost function (default: 0)' ) argreader.parser.add_argument( '--featmap_var', type=float, default=0, metavar='FEATVAR', help= 'the weight of the feature map var term in the cost function (default: 0)' ) argreader.parser.add_argument( '--optim', type=str, default="SGD", metavar='OPTIM', help='the optimizer algorithm to use (default: \'SGD\')') argreader.parser.add_argument( '--noise_init', type=float, default="0", metavar='NOISEINIT', help= 'The percentage of noise to add (relative to the filter norm) when initializing detectNets with \ a pre-trained detectNet') argreader.parser.add_argument( '--reverse_target', type=str2bool, default="False", help= 'To inverse the positive and the negative class. Useful to train a detectNet \ which will be later used to produce negative feature map' ) argreader.parser.add_argument( '--clu_train_mode', type=str, default='joint', metavar='TRAINMODE', help= 'Determines the cluster training mode. Can be \'joint\' or \'separated\' (default: \'joint\')' ) argreader.parser.add_argument('--rand_prop_val_sched', type=float, nargs='+', default=[0.9, 0.5, 0.1], metavar='RANDPROP_VAL_SCHED', help=')') argreader.parser.add_argument('--rand_prop_epo_sched', type=int, nargs='+', default=[0, 1, 2], metavar='RANDPROP_EPO_SCHED', help=')') argreader.parser.add_argument( '--init', type=str, default=None, metavar='N', help='the weights to use to initialize the detectNets') argreader.parser.add_argument( '--init_clu', type=str, default=None, metavar='N', help='the weights to use to initialize the clustNets') argreader.parser.add_argument( '--init_enc', type=str, default=None, metavar='N', help='the weights to use to initialize the encoder net') argreader.parser.add_argument( '--init_pos', type=str, default=None, metavar='N', help= 'the weights to use to initialize the positive detectnets. Ignored when not training a full clust detect net' ) argreader.parser.add_argument( '--init_neg', type=str, default=None, metavar='N', help= 'the weights to use to initialize the negative detectNets. Ignored when not training a full clust detect net' ) argreader.parser.add_argument( '--encapplyDropout2D', default=True, type=str2bool, metavar='N', help='whether or not to apply 2D dropout in the preprocessing net') #Reading the comand line arg argreader.getRemainingArgs() #Getting the args from command line and config file args = argreader.args args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if args.clust < 2: raise ValueError( "The number of cluster must be at least 2. Got {}".format( args.clust)) train_loader, test_loader = dataLoader.loadData(args.dataset, args.batch_size, args.test_batch_size, args.cuda, args.num_workers) #The group of class to detect np.random.seed(args.seed) classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] np.random.shuffle(classes) classToFind = classes[0:args.clust] #The folders where the experience file will be written if not (os.path.exists("../vis/{}".format(args.exp_id))): os.makedirs("../vis/{}".format(args.exp_id)) if not (os.path.exists("../results/{}".format(args.exp_id))): os.makedirs("../results/{}".format(args.exp_id)) if not (os.path.exists("../nets/{}".format(args.exp_id))): os.makedirs("../nets/{}".format(args.exp_id)) if args.pretrain: netType = "detectNet" elif args.pretrain_cae: netType = "cae" else: netType = "clustDetectNet" #Write the arguments in a config file so the experiment can be re-run argreader.writeConfigFile("../nets/{}/{}{}.ini".format( args.exp_id, netType, args.ind_id)) #Building the net net = netBuilder.netMaker(args) if args.cuda: net.cuda() startEpoch = initialize_Net_And_EpochNumber(net,args.pretrain,args.init,args.init_clu,args.init_enc,args.init_pos,args.init_neg,\ args.exp_id,args.ind_id,args.cuda,args.noise_init,netType) net.classToFind = classToFind #Getting the contructor and the kwargs for the choosen optimizer optimConst, kwargs = get_OptimConstructor_And_Kwargs( args.optim, args.momentum) #If no learning rate is schedule is indicated (i.e. there's only one learning rate), #the args.lr argument will be a float and not a float list. #Converting it to a list with one element makes the rest of processing easier if type(args.lr) is float: args.lr = [args.lr] if type(args.lr_cl) is float: args.lr_cl = [args.lr_cl] if (not args.pretrain) and (not args.pretrain_cae): #Adding a hook to add noise at every weight update if args.noise != 0: gradNoise = GradNoise(ampl=args.noise) for p in net.getClustWeights(): p.register_hook(gradNoise) #Train and evaluate the clustering detecting network for several epochs lrCounter = 0 for epoch in range(startEpoch, args.epochs + 1): #This condition determines when the learning rate should be updated (to follow the learning rate schedule) #The optimiser have to be rebuilt every time the learning rate is updated if (epoch - 1) % ( (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch: kwargs['lr'] = args.lr[lrCounter] print("Learning rate : ", kwargs['lr']) optimizerDe = optimConst(net.getDetectWeights(), **kwargs) kwargs['lr'] = args.lr_cl[lrCounter] print("Learning rate of clustNet: ", kwargs['lr']) optimizerCl = optimConst(net.getClustWeights(), **kwargs) if lrCounter < len(args.lr) - 1: lrCounter += 1 train(net, optimizerCl, optimizerDe, train_loader, epoch, args, classToFind) test(net, test_loader, epoch, args, classToFind) else: print("Pretraining") if args.pretrain_cae: trainFunc = trainCAE testFunc = testCAE kwargsFunc = {} else: trainFunc = trainDetect testFunc = testDetect kwargsFunc = {"classToFind": classToFind} #Train and evaluate the detecting network for several epochs lrCounter = 0 for epoch in range(startEpoch, args.epochs + 1): if (epoch - 1) % ( (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch: kwargs['lr'] = args.lr[lrCounter] print("Learning rate : ", kwargs['lr']) optimizerDe = optimConst(net.parameters(), **kwargs) if lrCounter < len(args.lr) - 1: lrCounter += 1 trainFunc(net, optimizerDe, train_loader, epoch, args, **kwargsFunc) testFunc(net, test_loader, epoch, args, **kwargsFunc)
import time import dataLoader from itertools import combinations positions = dataLoader.loadData("CrowdsourcingResults.csv") dataLoader.printPositions(positions) print "" print "" bold = lambda val: ("*" + str(val) + "*") def getHighestKey(positions, pos, key, usedPlayers=[]): bestPlayer = None def doBest(pos, bestPlayer=None): for player in positions[pos]: if player in usedPlayers: continue elif bestPlayer == None: bestPlayer = player else: if float(player[key]) > float(bestPlayer[key]): #print bestPlayer["Player"], "->", player["Player"] bestPlayer = player return bestPlayer if pos == "DH": # Any player can be a DH for position in positions: