Ejemplo n.º 1
0
def getData(filename, filepath):
	dataset = []
	labels = []
	
		currFilename = filename + str(i) + ".pkl"
		nextFilename = filename + str(i+1) + ".pkl"

		currTimestep = loadData(currFilename, filepath)
		nextTimestep = loadData(nextFilename, filepath)

		dataset, labels = createTimestep(currTimestep, nextTimestep)
Ejemplo n.º 2
0
def getData(filename, filepath, dataset, labels, timestep=None):
    assert (timestep is not None)
    print "at timestep: ", timestep
    currFilename = filename + str(timestep) + ".pkl"
    nextFilename = filename + str(timestep + 1) + ".pkl"

    currTimestep = loadData(currFilename, filepath)
    nextTimestep = loadData(nextFilename, filepath)

    dataset, labels = createTimestep(list(dataset), list(labels), currTimestep,
                                     nextTimestep)
    return np.array(dataset), np.array(labels)
Ejemplo n.º 3
0
    def __init__(self):
        #Load the data
        self.dataSet, self.labels = dataLoader.loadData(
            '.\\data\\trainingData.mat')
        self.dataSet_valid, self.labels_valid = dataLoader.loadData(
            '.\\data\\testData.mat')

        #Initialize variables
        self.epoch = 0
        self.current_sample = 0
        self.train_size = self.dataSet.shape[0]

        #Create a list od indices and shuffle them
        self.train_rand_idx = list(range(0, self.train_size))
        random.shuffle(self.train_rand_idx)
Ejemplo n.º 4
0
def score():
	time = 5
	meanAcc = 0
	names = ["CORA", "CITESEER", "PUBMED"]
	for name in names:
		for i in range(time):
			#nodes are labeled from 0 to N - 1
			#trainData{node:numpy array(N, 1),
			#          edge:numpy array(M, 2),
			#          node_attr:numpy array(N, D),
			#		   ID: (N1, 1) numbers in range 0 to N - 1
			#          label:numpy array(N1,1)}
			
			#testData{node:numpy array(N, 1),
			#         edge:numpy array(M, 2),
			#         node_attr:numpy array(N, D),
			#         ID: (N2, 1)} numbers in range 0 to N - 1}
			
			#testLabel:numpy array(N2,1)
			
			#N1 + N2 = N

			#loadData will random split nodeID in train and test, the split rate is 2:8
	    	trainData, testData, testLabel = loadData(name)
	    	trainedModel = model.train(trainData)

	    	#return a numpy array of (N2, 1) contains the predicted label of test nodes 
	    	predictedLabel = model.test(testData)
	    	meanACC += accuracy(testLabel, predictedLabel)

    meanACC = meanAcc * 1.0 / time / len(names)

if __name__ == '__main__':
	score()
Ejemplo n.º 5
0
def loadDataFromFiles(path):
    data = {}
    files = os.listdir(path)
    for item in files:
        nameClassifier = os.path.splitext(item)[0]
        data[nameClassifier] = loadData(os.path.join(path, item))

    return data
Ejemplo n.º 6
0
def analyseUncertainties():
    pathFile = os.path.join('machine_learner', 'collected_data',
                            'overall_adaptation_options.json')

    adapResults = loadData(pathFile)

    # Get the minimum and maximum value for energy consumption over all configurations (used in graphs)
    ecResults = [[ao.ec for ao in adapResult] for adapResult in adapResults]
    # flatten the list
    ecResults = [item for sublist in ecResults for item in sublist]
    minEC, maxEC = min(ecResults), max(ecResults)

    # sort the items of interest in ascending order
    # sortFunction = lambda x, y: -1 if x.getScatterRate() < y.getScatterRate() else (0 if x.getScatterRate() == y.getScatterRate() else 1)
    # worstScatterRate = sorted([result for result in adapResults], key=functools.cmp_to_key(sortFunction), reverse=False)

    # sortFunction = lambda x, y: -1 if x.getAmtOfWrongPredictions() < y.getAmtOfWrongPredictions() else (0 if x.getAmtOfWrongPredictions() == y.getAmtOfWrongPredictions() else 1)
    # worstPredictionRate = sorted([result for result in adapResults], key=lambda res: res.getAmtOfWrongPredictions()[0], reverse=True)
    print()

    # ===================
    # Plotting of results
    # ===================

    # Plots for the 5 adaptation options with the worst scatter rate
    for i in range(5):
        pass
        # plotRegressionPredictions(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}')
        # plotLearningEvolution(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}')
        # plotEffectUncertainties(worstScatterRate[i], f'worstScatter{i+1}')
        # plotLearningEffectOneCycle(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}')
        # plotLearningEffect2Cycles(worstScatterRate[i], minEC, maxEC, f'worstScatter{i+1}')
        # printProgressBar(i+1, 5, prefix="Progress worst scatter rates:", suffix='Complete', length=30)

    # Plots for the 20 adaptation options with the worst predictions
    for i in range(20):
        pass
        # print(f"Error rate configuration {i+1}: {worstPredictionRate[i].getAmtOfWrongPredictions()}")
        # plotLearningEvolution(worstPredictionRate[i], minEC, maxEC, f'worstPrediction{i+1}')
        # plotEffectUncertainties(worstPredictionRate[i], f'worstPrediction{i+1}')
        # printProgressBar(i+1, 20, prefix="Progress worst prediction graphs:", suffix='Complete', length=30)

    # Plots for adaptationOptions which have at most 100 samples more on either side of the cutoff line in comparison to the other side
    filteredOptions = list(
        filter(lambda x: x.getScatterRate() <= 100, adapResults))
    for i in range(len(filteredOptions)):
        pass
        # plotLearningEvolution(filteredOptions[i], minEC, maxEC, f'filtered{i+1}')

    # Plots for all adaptation options
    for i in range(len(adapResults)):
        plotRegressionPredictions(adapResults[i], minEC, maxEC, f'all{i+1}')
        plotLearningEvolution(adapResults[i], minEC, maxEC, f'all{i+1}')
        printProgressBar(i + 1,
                         len(adapResults),
                         prefix="Progress all graphs:",
                         suffix='Complete',
                         length=30)
Ejemplo n.º 7
0
def analyseUncertainties(filename = os.path.join('machine_learner', 'collected_data', 'overall_adaptation_options.json')):
    # pathFile = os.path.join('machine_learner', 'collected_data', 'overall_adaptation_options.json')

    adapResults = loadData(filename)

    
    # Get the minimum and maximum value for energy consumption over all configurations (used in graphs)
    ecResults = [[ao.ec for ao in adapResult] for adapResult in adapResults]
    # flatten the list
    ecResults = [item for sublist in ecResults for item in sublist]
    # minEC, maxEC = min(ecResults), max(ecResults)

    print()


    # ===================
    # Plotting of results
    # ===================

    # Plots for all adaptation options
    # for i in range(len(adapResults)):
    #     plotSingleConfiguration(adapResults[i], f'all{i+1}')
    #     # plotLearningEvolution(adapResults[i], f'all{i+1}')
    #     printProgressBar(i+1, len(adapResults), prefix="Progress all graphs:", suffix='Complete', length=30)

    # indices = [1, 2, 3, 4, 5, 55, 58, 60, 61, 72, 78, 112, 164, 168, 170] # Classification DeltaIoTv1
    # indices = [1, 2, 3, 37, 38, 41, 42, 44, 60, 61, 64, 65, 66, 71, 73, 83] # Regression DeltaIoTv1
    # indices = [367,769,2122,2179,2206] # Classification DeltaIoTv2
    # indices = [] # Regression DeltaIoTv2

    # indices = [3,31,262,646,742,727] # Difficult configurations classification DeltaIoTv2
    indices = [31]
    index = 0
    printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30)
    for i in indices:
        plotSingleConfiguration(adapResults[i-1], f'configuration_{i}')
        # plotLearningEvolution(adapResults[i-1], f'configuration_{i}')
        index += 1
        printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30)


    # indices = [173, 174, 363, 365, 366, 384, 392, 411, 741, 744, 749, 750, 751, 752, 753, 756, 759, 761, 762, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 830, 831, 832, 833, 834, 852, 854, 855, 857, 858, 860, 861, 888, 915, 933, 939, 942, 969, 1134, 1542, 1544, 1545, 1546, 1547, 1548, 2095, 2096, 2097, 2111, 2112, 2113, 2114, 2115, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2177, 2178, 2193, 2194, 2195, 2196, 2202, 2203, 2204, 2205, 2258, 2259, 2284, 2285, 2286, 2547, 2555, 2556, 3114, 3122, 3123, 3222, 3254, 3255, 3256, 3257, 3258, 3263, 3264, 3265, 3266, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3284, 3285, 3292, 3293, 3294, 3298, 3299, 3300, 3301, 3302, 3303, 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365, 3366, 3474, 3482, 3489, 3490, 3491, 3492, 3506, 3507, 3509, 3510, 3513, 3514, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3546, 3550, 3551, 3552, 3553, 3554, 3555, 3560, 3561, 3562, 3563, 3564, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3577, 3578, 3579, 3580, 3581, 3582, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3662, 3663, 3669, 3671, 3672, 3681, 3735, 3742, 3743, 3744, 3751, 3752, 3753, 3758, 3759, 3760, 3761, 3762, 3766, 3767, 3768, 3769, 3770, 3771, 3807, 3813, 3814, 3815, 3816, 3821, 3822, 3823, 3824, 3825, 3830, 3831, 3832, 3833, 3834, 3837, 3838, 3840, 3841, 3842, 3843, 3847, 3848, 3849, 3850, 3851, 3852, 3922, 3923, 3930, 3931, 3932, 3933, 3995, 4002, 4004, 4005, 4011, 4012, 4013, 4014, 4050, 4059, 4067, 4068, 4093, 4094, 4095]
    # # indices = [363, 3523, 3524, 3578, 3587]
    # indices = [741]
    # index = 0
    # print()
    # printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30)
    # for i in indices[::-1]:
    #     plotEffectUncertaintiesPLLA(adapResults[i], f'configuration_{i}')
    #     index += 1
    #     printProgressBar(index, len(indices), prefix="Progress specific graphs:", suffix='Complete', length=30)
    print()
Ejemplo n.º 8
0
def hiddenRepTSNE(args):

    #Setting the size and the number of channel depending on the dataset
    if args.dataset == "MNIST":
        inSize = 28
        inChan = 1
    elif args.dataset == "CIFAR10":
        inSize = 32
        inChan = 3
    else:
        raise("netMaker: Unknown Dataset")

    net = netBuilder.CNN(inSize=inSize,inChan=inChan,chan=int(args.encchan),avPool=False,nbLay=int(args.encnblay),\
                      ker=int(args.encker),maxPl1=int(args.encmaxpl1),maxPl2=int(args.encmaxpl2),applyDropout2D=0,nbOut=0,\
                      applyLogSoftmax=False,nbDenseLay=0,sizeDenseLay=0)

    if args.cuda:
        net.cuda()

    net.setWeights(torch.load(args.tsne[0]),cuda=args.cuda,noise_init=0)

    train_loader,test_loader = dataLoader.loadData(args.dataset,int(args.batch_size),int(args.test_batch_size),args.cuda,int(args.num_workers))
    loader = train_loader if args.train else test_loader

    #Choosing a random batch of images among the dataset
    data,target = next(iter(loader))
    if args.cuda:
        data = data.cuda()
        target = target.cuda()

    #Computes the hidden representation of the batch of images
    repre,_,_ = net.convFeat(data)

    colors = cm.rainbow(np.linspace(0, 1, 10))

    #visualisation of the transformed data
    repre = repre.view(data.size(0),-1).cpu().detach().numpy()
    repre_emb = TSNE(n_components=2,init='pca',learning_rate=20).fit_transform(repre)
    plotEmb(repre_emb,target,"../vis/{}/{}_tsne.png".format(args.exp_id,args.ind_id),colors)

    ##Visualization of the raw data
    repre = data.view(data.size(0),-1).cpu().detach().numpy()
    repre_emb = TSNE(n_components=2,init='pca',learning_rate=20).fit_transform(repre)
    plotEmb(repre_emb,target,"../vis/{}/{}_tsne_raw.png".format(args.exp_id,args.ind_id),colors)
Ejemplo n.º 9
0
def failuresCases(args):

    _,test_loader = dataLoader.loadData(dataset=args.dataset,batch_size=args.batch_size,test_batch_size=1,cuda=False)

    #Count the number of net in the experiment1
    netNumber = len(glob.glob("../nets/{}/*.ini".format(args.exp_id)))

    #Get and sort the experiment file
    if args.train:
        scorFiles = sortExperiFiles("../results/"+args.exp_id+"/*epoch*_train.csv",netNumber)
    else:
        scorFiles = sortExperiFiles("../results/"+args.exp_id+"/*epoch*[0-9].csv",netNumber)

    imgCounter = 0

    netId=args.failurecases[0]
    epoch=args.failurecases[1]

    for data, origTarget in test_loader:

        if imgCounter%10 == 0:
            print("Img ",imgCounter)

        if not (os.path.exists("../vis/{}/net{}".format(args.exp_id,netId))):
            os.makedirs("../vis/{}/net{}".format(args.exp_id,netId))

        if not os.path.exists("../vis/{}/net{}/epoch{}".format(args.exp_id,netId,epoch)):
            os.makedirs("../vis/{}/net{}/epoch{}".format(args.exp_id,netId,epoch))

        csv = genfromtxt(scorFiles[netId,epoch], delimiter=',')
        output = csv[imgCounter,2:4]
        binaryTarget = csv[imgCounter,1]
        fullTarget = csv[imgCounter,0]
        clusters = csv[imgCounter,4:9]

        if np.argmax(output) != binaryTarget:

            vis.writeImg("../vis/{}/net{}/epoch{}/{}.jpg".format(args.exp_id,netId,epoch,imgCounter),data[0][0].detach().numpy())

        imgCounter += 1
Ejemplo n.º 10
0
def compareResultsClassifiers(inputPath, outputPath):
    files = sorted(os.listdir(inputPath))
    bestSample = (100, '')

    header = [
        'Technique', 'Loss function', 'Penalty', 'Scaler',
        'Overall error percentage', 'F1 score (weighted)',
        'Matthews correlation coefficient', 'F1 scores (class 0 - 3)'
    ]

    outputData = {'header': header, 'values': []}

    csvOutputFile = open(os.path.join(outputPath, CSV_GENERAL_NAME), mode='w')
    csvOutputWriter = csv.writer(csvOutputFile, delimiter=',')
    csvOutputWriter.writerow(header)

    confMatrices = {'all': {}, 'versatile': {}}

    index = 0
    printProgressBar(index,
                     len(files),
                     prefix='Processing of classifiers:',
                     suffix='Complete',
                     length=30)
    # The key is the used classifier, value is the data associated with the classifier
    for filename in files:
        configurations = loadData(os.path.join(inputPath, filename))
        key = os.path.splitext(filename)[0]
        classifier, loss, penalty, scaler = key.split('_')

        # The overall percentage of errors in the predicted values
        errorPercentageOverall = getErrorRate(configurations)

        # Skip the classifiers with an error rate over 50%
        if errorPercentageOverall > 30:
            # NOTE: removes the file (make sure it is stored somewhere else as well)
            pass
            # os.remove(os.path.join(inputPath,filename))
            # index += 1
            # printProgressBar(index, len(files), prefix='Processing of classifiers:', suffix='Complete', length=30)
            # continue

        if errorPercentageOverall < bestSample[0]:
            bestSample = (errorPercentageOverall, filename)

        # Confusion matrix for all configurations
        overallConfMatrix = getConfusionMatrix(configurations)
        confMatrices['all'][f'{classifier}_{loss}_{penalty}_{scaler}'] = (
            overallConfMatrix)
        printConfusionMatrix(
            overallConfMatrix, outputPath,
            f'ConfusionMatrixAll_{classifier}_{loss}_{penalty}_{scaler}')

        matthewsCorrCoef = calculateMatthewsCorrCoef(configurations)

        # F1 values of all the configurations
        F1All = calculateF1Scores(configurations)
        F1AllWeighted = calculateF1Score(configurations)
        F1AllStr = '[' + ','.join([f'{i:.4f}' for i in F1All]) + ']'
        del configurations

        row = [
            classifier, loss, penalty, scaler,
            f'{errorPercentageOverall:.2f}%', f'{F1AllWeighted:.4f}',
            f'{matthewsCorrCoef:.4f}', f'{F1AllStr}'
        ]
        outputData['values'].append(row)
        csvOutputWriter.writerow(row)

        index += 1
        printProgressBar(index,
                         len(files),
                         prefix='Processing of classifiers:',
                         suffix='Complete',
                         length=30)

    csvOutputFile.close()

    print()
    print(f'Best sample ({bestSample[0]:.2f}%): {bestSample[1]}')
    print(
        f'Best Matthews correlation coefficient: {max([float(i[6]) for i in outputData["values"]]):.4f}'
    )
    print(
        f'Best F1 score (weighted): {max([float(i[5]) for i in outputData["values"]]):.4f}'
    )

    writeConfMatricesToFiles(confMatrices['all'], CSV_CONFALL_NAME, outputPath)
    printTable(outputData, outputPath)
Ejemplo n.º 11
0
import numpy as np
import dataLoader as loader
import urllib

dictTypes = {0: u"爱情片", 1: u"动作片"}


def classify(inX, mat, labels, k):
    mat = inX - mat
    mat = mat**2
    sum = mat.sum(axis=1)
    sum = sum**0.5
    sortIndices = sum.argsort()
    dict = {}
    for i in range(k):
        type = labels[sortIndices[i]]
        dict[type] = dict.get(type, 0) + 1
    print(dict)
    sortedTypes = sorted(dict, key=lambda i: i[0], reverse=True)
    return dictTypes[int(sortedTypes[0][0])]


if __name__ == "__main__":
    mat, labels = loader.loadData("./Classifier/Classifier/samples/result.txt")
    X = np.array(mat)[:, 1:]
    Y = np.array(labels)
    inX = np.array([[30, 20]])
    curType = classify(inX, X, Y, 3)
    print("The type of the film is ", curType)
Ejemplo n.º 12
0
import time
import dataLoader
from itertools import combinations

positions = dataLoader.loadData("CrowdsourcingResults.csv")

dataLoader.printPositions(positions)
print ""
print ""

bold = lambda val: ("*" + str(val) + "*")


def getHighestKey(positions, pos, key, usedPlayers=[]):
    bestPlayer = None

    def doBest(pos, bestPlayer=None):
        for player in positions[pos]:
            if player in usedPlayers:
                continue
            elif bestPlayer == None:
                bestPlayer = player
            else:
                if float(player[key]) > float(bestPlayer[key]):
                    # print bestPlayer["Player"], "->", player["Player"]
                    bestPlayer = player
        return bestPlayer

    if pos == "DH":
        # Any player can be a DH
        for position in positions:
Ejemplo n.º 13
0
poly6 = Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(0.,-1.),(1.,0.)], 6, imbedDim)
poly7 = Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,-1.),(1.,0.)], 7, imbedDim)

# poly8 = polytope.Polytope([], 0, imbedDim)
# poly9 = polytope.Polytope([(-1.,2.),(-1.,-1.),(2.,-1.)], 1, imbedDim)
# poly10 = polytope.Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(1.,0.)], 2, imbedDim)
# poly11 = polytope.Polytope([(-1.,2.),(-1.,-1.),(1.,-1.),(1.,0.)], 3, imbedDim)
# poly12 = polytope.Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,0.)], 4, imbedDim)
# poly13 = polytope.Polytope([(-1.,2.),(-1.,-1.),(-1.,0.),(1.,0.)], 5, imbedDim)
# poly14 = polytope.Polytope([(0.,1.),(-1.,0.),(-1.,-1.),(0.,-1.),(1.,0.)], 6, imbedDim)
# poly15 = polytope.Polytope([(0.,1.),(-1.,1.),(-1.,-1.),(1.,-1.),(1.,0.)], 7, imbedDim)

polytopes = [poly0, poly1, poly2, poly3, poly4, poly5, poly6, poly7]

## Note batch_size must divide evenly into len(train_data_raw)
train_data, test_data = loadData(polytopes, batch_size=12)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Running on", device)

# device = torch.device("cpu")
EPOCHS = 100

net = Net([2 * imbedDim, 10, 8], imbedDim, device).to(device)
net.train(train_data, test_data, EPOCHS)
accuracy = net.test(test_data)

if accuracy * 100 >= 80:
    torch.save(net.state_dict(),'model4.pt')

Ejemplo n.º 14
0
def mainProcess(singleTopic, onlyOne):
    '''
         _____                  _
        |_   _|                | |
          | | _ __  _ __  _   _| |_
          | || '_ \| '_ \| | | | __|
         _| || | | | |_) | |_| | |_
         \___/_| |_| .__/ \__,_|\__|
                   | |
                   |_|
    '''

    np.set_printoptions(threshold=np.nan)
    # inputs = basicInput.basic()
 
    optimize = inputs['optimize']
    allTrainingData = inputs['allTrainingData']
    jsonInRedis = inputs['jsonInRedis']
    latestFileNumber = int(inputs['latestFileNumber'])
    includeSingles = inputs['includeSingles']

    # (optimize, allTrainingData, jsonInRedis, latestFileNumber, includeSingles) = inputs

    '''
          _                     _
         | |                   | |
         | |     ___   __ _  __| |
         | |    / _ \ / _` |/ _` |
         | |___| (_) | (_| | (_| |
         \_____/\___/ \__,_|\__,_|
    '''
    dataToLoad = 'trainingData' if allTrainingData else 'subsetTraining'
    dbStore = 15 if allTrainingData else 14
    load = dataLoader.loadData(dataToLoad, 'TrainingData')
    topicDictionary = readTopics.readTopics()
    red = redis.Redis(host='localhost', port=6379, db=dbStore)

    initialTime = datetime.now()
    if jsonInRedis:
        if (red.exists('totalKeys')):
            print("---> Data will be read from Redis.")
        else:
            loadTime = datetime.now()
            load.getAllReports(red, jsonInRedis)
            text = "Loaded all Reports into Redis --->> "
            print(text + str(datetime.now() - loadTime))

    else:
        (justBodies, justTopics) = returnJustBodiesAndTopics(load, red, jsonInRedis)

    '''
          ____
         / ___|___  _ __ _ __  _   _ ___
        | |   / _ \| '__| '_ \| | | / __|
        | |__| (_) | |  | |_) | |_| \__ \
         \____\___/|_|  | .__/ \__,_|___/
                        |_|
    '''

    print("Loading all Reports for TrainingData --->> " + str(initialTime))
    if jsonInRedis:
        fileStart = load.allJSONFiles[-latestFileNumber]
        numberOfKeys = int(red.get('totalKeys'))
        startingEntry = basicInput.fileThresholds(red, fileStart)
        output = createCorpus.createCorpusFromRedis(red, numberOfKeys,
                                                    startingEntry,
                                                    topicDictionary,
                                                    singleTopic,
                                                    singleClassify)
        (myLabelMatrix, corpus) = output
    else:
        output = createCorpus.createCorpusFromFile(justBodies, justTopics,
                                                   topicDictionary,
                                                   singleTopic,
                                                   singleClassify)
    (myLabelMatrix, corpus) = output

    text = "Loading all Reports for TestData --->> "
    print(text + str(datetime.now() - initialTime))
    predictData = dataLoader.loadData("testData", 'TestData')
    reportsToPredict = []
    reportNames = []
    topicsInResult = {}
    for reportToPredict in predictData.getAllReports(None, False):
        reportsToPredict.append(reportToPredict.bodyText)
        reportNames.append(reportToPredict.documentName)
        if optimize:
            t = topicDictionary.generateMultiLabelArray(reportToPredict.topics, singleTopic)
            topicsInResult[reportToPredict.documentName] = t

    '''
         _____ _               _  __
        /  __ \ |             (_)/ _|
        | /  \/ | __ _ ___ ___ _| |_ _   _
        | |   | |/ _` / __/ __| |  _| | | |
        | \__/\ | (_| \__ \__ \ | | | |_| |
        \_____/_|\__,_|___/___/_|_|  \__, |
                                      __/ |
                                     |___/
    '''

    if (optimize):
        (bestLabels, num) = optimizer.optimizer(initialTime, myLabelMatrix,
                                                corpus, reportsToPredict,
                                                topicsInResult, optimize,
                                                classifyAndPredict,
                                                calculateF1Score,
                                                singleTopic)
        if (num == 0):
            return
        storeBestResults.store(redis, singleTopic, reportNames, bestLabels)
        return
    else:
        (minDf, maxDf, maxFeatures) = staticValues.static()
        classificationRes = classifyAndPredict.predict(maxDf, minDf,
                                                       maxFeatures,
                                                       initialTime,
                                                       myLabelMatrix,
                                                       corpus,
                                                       reportsToPredict,
                                                       topicsInResult,
                                                       optimize, None)
        (labelsPredicted, reportsToPredict) = classificationRes

    '''
         _____  _____  _   _
        /  __ \/  ___|| | | |
        | /  \/\ `--. | | | |
        | |     `--. \| | | |
        | \__/\/\__/ /\ \_/ /
        \_____/\____/  \___/
    '''

    header = ['id'] + topicDictionary.lookupList
    notInTraining = notInTrainingList.notTrained()
    exclude = set(string.punctuation)
    red = redis.Redis(host='localhost', port=6379, db=6)
    with open('Results/Submission.csv', 'w', newline='') as outcsv:
        csvWriter = csv.writer(outcsv)
        csvWriter.writerow(header)

        count = 0
        count2 = 0
        storeRes = True
        checkRes = False
        for reportName, labels in zip(reportNames, labelsPredicted):
            myBodyText = makeAGuess.reshapeBodyText(reportsToPredict[count],
                                                    exclude)
            newLabels = makeAGuess.guess(labels, notInTraining, myBodyText)

            if storeRes:
                for item in newLabels:
                    red.sadd(reportName, item)

            if checkRes:
                for item in newLabels:
                    member = red.sismember(reportName, item)
                    oldTopic = item.decode("utf-8")
                    if member and (oldTopic not in newLabels):
                        newLabels = newLabels + (oldTopic, )

            includeSingles = False
            if includeSingles:
                (newLabels, count2) = includeSingleTopics.include(red,
                                                                  newLabels,
                                                                  reportName,
                                                                  count2)

            printToSubmissionCSV.toCSV(csvWriter, reportName, newLabels,
                                       topicDictionary.lookupList)


        print("Included an extra " + str(count2) + " labels.")
        text = "CSV written / Run complete --->> "
        print(text + str(datetime.now() - initialTime))
Ejemplo n.º 15
0
'''

import numpy as np
import tensorflow as tf
from time import time
import sys
from dataLoader import loadData
import os

seed = int(time())
np.random.seed(seed)

# load data
tr, vr = loadData('./ml-1m/ratings.dat',
                  delimiter='::',
                  seed=seed,
                  transpose=True,
                  valfrac=0.1)

tm = np.greater(tr,
                1e-12).astype('float32')  # masks indicating non-zero entries
vm = np.greater(vr, 1e-12).astype('float32')

n_m = tr.shape[0]  # number of movies
n_u = tr.shape[
    1]  # number of users (may be switched depending on 'transpose' in loadData)

# Set hyper-parameters
n_hid = 500
lambda_2 = float(sys.argv[1]) if len(sys.argv) > 1 else 60.
lambda_s = float(sys.argv[2]) if len(sys.argv) > 2 else 0.013
Ejemplo n.º 16
0
        plt.text(x + 0.2, y + 0.05, '%.2f' % y, va='center', ha='center')
    plt.show()


def drawScatter(vecX, vecY, types, **labels):
    xMax = float(max(vecX))
    xMin = float(min(vecX))
    yMax = float(max(vecY))
    yMin = float(min(vecY))
    plt.xlim(xMin - 0.1, xMax + 0.1)
    plt.ylim(yMin - 0.1, yMax + 0.1)

    uniqTypes = set(types)
    dictTypes = {}
    for type in uniqTypes:
        dictTypes[type] = [[], []]
    for i in range(len(types)):
        dictTypes[types[i]][0].append(vecX[i])
        dictTypes[types[i]][1].append(vecY[i])
    for type in uniqTypes:
        plt.scatter(dictTypes[type][0], dictTypes[type][1], s=50)
    plt.show()


if __name__ == "__main__":
    mat, types = loader.loadData("./Classifier/Classifier/samples\data1.txt")
    mat = np.mat(mat)
    vecX = mat[:, 0]
    vecY = mat[:, 1]
    drawScatter(vecX, vecY, types)
Ejemplo n.º 17
0
import dataLoader as dl

symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
nTrainingData = 10000
nTestData = 1000
pathToData = 'C:\\Users\\t_tor\\Unsynced\\extracted_images\\'

trainingData, testData = dl.loadData(symbols, nTrainingData, nTestData,
                                     pathToData)
Ejemplo n.º 18
0
def featureMapVariance(args):

    torch.manual_seed(args.seed)

    _,test_loader = dataLoader.loadData(dataset=args.dataset,batch_size=args.batch_size,test_batch_size=args.test_batch_size,cuda=False)

    netId=args.feat_map_var[0]
    layNb=args.feat_map_var[1]

    #Get and sort the experiment file
    weigFiles = sortExperiFiles("../nets/"+args.exp_id+"/clustDetectNet"+str(netId)+"_epoch*".format(args.exp_id),netNumber=1)
    paramDictPath = "../nets/"+str(args.exp_id)+"/clustDetectNet"+str(netId)+".ini"

    #Getting the dataset and the boolean parameter inweig
    #Assuming the all the net in the exp have the same dataset
    #and the same value for the boolean parameter inweig
    config = configparser.ConfigParser()
    config.read(paramDictPath)

    batch_nb = len(test_loader.dataset)//args.test_batch_size

    #Updating args with the argument in the config file
    argsDict = vars(args)
    for key in config['default']:

        if key in argsDict:
            if not argsDict[key] is None:
                cast_f = type(argsDict[key])

                if cast_f is bool:
                    cast_f = lambda x:True if x == "True" else False

                if config['default'][key][0] == "[" :
                    values = config['default'][key].replace("[","").replace("]","").split(" ")
                    argsDict[key] = [cast_f(value.replace(",","")) for value in values]
                else:
                    argsDict[key] = cast_f(config['default'][key])

    args = Bunch(argsDict)

    net = netBuilder.netMaker(args)
    net.eval()

    imgCounter = 0

    #Getting the size of feature map at the desired layer
    img = test_loader.dataset[0][0]
    imgSize = net(img[None,:,:,:])[1][-3].size(-1)

    plt.figure()
    epoch_count = 0
    colors = cm.rainbow(np.linspace(0, 1, args.clust))

    for weigFile in weigFiles[0]:
        epoch_count +=1
        print("Epoch",epoch_count)

        net.load_state_dict(torch.load(weigFile))

        batch_count = 1
        outputComputed = False

        for i in range(args.clust):
            open("feature_map_{}_pos_tmp.csv".format(i),'w')
            open("feature_map_{}_neg_tmp.csv".format(i),'w')

        clusDisSum = torch.zeros(args.clust)

        for data, origTarget in test_loader:

            target = mnist.merge(origTarget)

            if batch_count%(batch_nb//10) ==0:
                print("\tbatch",batch_count,"on",batch_nb)
            batch_count +=1

            _,actArr = net(data)

            act = actArr[-3].view(args.clust,-1,imgSize,imgSize)

            for i in range(len(act)):

                mapsPos = mnist.masked_index(act[i],0,(target != 0).long())
                mapsNeg = mnist.masked_index(act[i],0,((1-target) != 0).long())

                if mapsPos.size(0) != 0:
                    nonEmptyPos = mnist.masked_index(mapsPos,0,(mapsPos.sum(dim=1).sum(dim=1) != 0).long())
                    writeMap(nonEmptyPos,"feature_map_{}_pos_tmp.csv".format(i))

                if mapsNeg.size(0) != 0:
                    nonEmptyNeg = mnist.masked_index(mapsNeg,0,(mapsNeg.sum(dim=1).sum(dim=1) != 0).long())
                    writeMap(nonEmptyNeg,"feature_map_{}_neg_tmp.csv".format(i))

        plotVariance("pos",args.clust,epoch_count,colors,netId,layNb,args.exp_id)
        plotVariance("neg",args.clust,epoch_count,colors,netId,layNb,args.exp_id)
Ejemplo n.º 19
0
def activationSparsity(args):

    #Count the number of net in the experiment
    netNumber = len(glob.glob("../nets/{}/*.ini".format(args.exp_id)))

    #Get and sort the experiment file
    weigFiles = sortExperiFiles("../nets/"+args.exp_id+"/clustDetectNet*_epoch*".format(args.exp_id),netNumber)
    paramDictPaths = sorted(glob.glob("../nets/"+str(args.exp_id)+"/*.ini"))

    config = configparser.ConfigParser()

    _,test_loader = dataLoader.loadData(args.dataset,args.batch_size,args.test_batch_size)

    #Assuming the all the net in the exp have the same dataset
    #and the same value for the boolean parameter inweig
    config.read(paramDictPaths[0])
    dataset = config['default']["dataset"]
    inweig = (config['default']["inweig"] == 'True')
    clust = int(config['default']["clust"])

    #Plotting the loss across epoch and nets
    plotHist = plt.figure(1,figsize=(8,5))
    ax1 = plotHist.add_subplot(111)
    box = ax1.get_position()
    ax1.set_position([box.x0, box.y0, box.width * 0.7, box.height])

    plt.xlabel('Epoch')
    plt.ylabel('Sparsity')
    handlesInp = []
    handlesConv1 = []
    handlesConv2 = []

    #cmap = cm.get_cmap(name='rainbow')
    colors = cm.rainbow(np.linspace(0, 1, len(weigFiles)))

    for i in range(len(weigFiles)):

        print("Net",i)
        #Reading general parameters
        config.read(paramDictPaths[i])
        paramDict = config['default']

        #check if net parameter are in the config file
        #if they are not : using the default ones
        if not 'biasclu' in config['default']:
            config.read("clust.config")

        config['default']["runCuda"] = str(args.cuda)

        paramNamespace = Bunch(config['default'])

        net = netBuilder.netMaker(paramNamespace)
        net.eval()

        sparsInpMean = np.empty((len(weigFiles[0])))
        sparsConv1Mean = np.empty((len(weigFiles[0])))
        sparsConv2Mean = np.empty((len(weigFiles[0])))

        for j in range(len(weigFiles[0])):

            net.load_state_dict(torch.load(weigFiles[i,j]))

            sparsInpMean[j] = 0
            sparsConv1Mean[j] = 0
            sparsConv2Mean[j] = 0

            for data, origTarget in test_loader:

                output,actArr = net(data)
                cluDis = net.cluDis
                clusts = actArr[2]
                maps = actArr[-3]
                summed_maps = actArr[-2]

                sparsInpMean[j] += computeSparsity(actArr[3]).mean()*len(data)/len(test_loader.dataset)
                sparsConv1Mean[j] += computeSparsity(actArr[4]).mean()*len(data)/len(test_loader.dataset)
                sparsConv2Mean[j] += computeSparsity(actArr[5]).mean()*len(data)/len(test_loader.dataset)

        label = ''.join((str(param)+"="+str(paramDict[param]+",")) for param in args.spar)

        handlesInp += ax1.plot(sparsInpMean, label=label,color=colors[i])
        handlesConv1 += ax1.plot(sparsConv1Mean, label=label,color=colors[i], dashes = [6,2])
        handlesConv2 += ax1.plot(sparsConv2Mean, label=label,color=colors[i], dashes = [2,2])

        ax1.set_ylim([0, 1])

        legInp = plotHist.legend(handles=handlesInp, loc='upper right' ,title="Input")
        legConv1 = plotHist.legend(handles=handlesConv1, loc='center right' ,title="Conv1")
        legConv2 = plotHist.legend(handles=handlesConv2, loc='lower right' ,title="Conv2")

        plotHist.gca().add_artist(legInp)
        plotHist.gca().add_artist(legConv1)
        plotHist.gca().add_artist(legConv2)

        plt.grid()
        plt.savefig('../vis/{}/histo.pdf'.format(args.exp_id))
Ejemplo n.º 20
0
def main(argv=None):

    #Getting arguments from config file and command line
    #Building the arg reader
    argreader = ArgReader(argv)

    argreader.parser.add_argument(
        '--noise',
        type=float,
        metavar='NOISE',
        help=
        'the amount of noise to add in the gradient of the clustNet (in percentage)(default: 0.1)'
    )

    argreader.parser.add_argument(
        '--optim',
        type=str,
        default="SGD",
        metavar='OPTIM',
        help='the optimizer algorithm to use (default: \'SGD\')')
    argreader.parser.add_argument(
        '--init',
        type=str,
        default=None,
        metavar='N',
        help='the weights to use to initialize the detectNets')

    #Reading the comand line arg
    argreader.getRemainingArgs()

    #Getting the args from command line and config file
    args = argreader.args

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader, test_loader, perm = dataLoader.loadData(
        args.dataset, args.batch_size, args.test_batch_size, args.permutate,
        args.cuda, args.num_workers, args.crop_size_imagenet, args.train_prop)

    if args.write_img_ex:

        for i in range(10):
            tensor = test_loader.dataset[i][0]
            vis.writeImg(
                '../vis/{}/{}_img{}.jpg'.format(args.exp_id, args.dataset, i),
                tensor.detach().numpy())

            origSize = tensor.size()
            tensor = tensor.view(-1)
            tensor = tensor[np.argsort(perm)]
            tensor = tensor.view(origSize)

            vis.writeImg(
                '../vis/{}/{}_img{}_noperm.jpg'.format(args.exp_id,
                                                       args.dataset, i),
                tensor.detach().numpy())

    #The folders where the experience file will be written
    if not (os.path.exists("../vis/{}".format(args.exp_id))):
        os.makedirs("../vis/{}".format(args.exp_id))
    if not (os.path.exists("../results/{}".format(args.exp_id))):
        os.makedirs("../results/{}".format(args.exp_id))
    if not (os.path.exists("../nets/{}".format(args.exp_id))):
        os.makedirs("../nets/{}".format(args.exp_id))

    netType = "net"

    #Write the arguments in a config file so the experiment can be re-run
    argreader.writeConfigFile("../nets/{}/{}{}.ini".format(
        args.exp_id, netType, args.model_id))

    #The writer for tensorboardX
    writer = SummaryWriter("../results/{}".format(args.exp_id))

    print("Model :", args.model_id, "Experience :", args.exp_id)

    #Building the net
    net = netBuilder.netMaker(args)

    if args.cuda:
        net.cuda()

    startEpoch = initialize_Net_And_EpochNumber(net, args.start_mode,
                                                args.init_path, args.exp_id,
                                                args.model_id, args.cuda,
                                                netType)

    #Getting the contructor and the kwargs for the choosen optimizer
    optimConst, kwargs = get_OptimConstructor_And_Kwargs(
        args.optim, args.momentum)

    #If no learning rate is schedule is indicated (i.e. there's only one learning rate),
    #the args.lr argument will be a float and not a float list.
    #Converting it to a list with one element makes the rest of processing easier
    if type(args.lr) is float:
        args.lr = [args.lr]

    #Train and evaluate the clustering detecting network for several epochs
    lrCounter = 0

    for epoch in range(startEpoch, args.epochs + 1):

        #This condition determines when the learning rate should be updated (to follow the learning rate schedule)
        #The optimiser have to be rebuilt every time the learning rate is updated
        if (epoch - 1) % (
            (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch:

            kwargs['lr'] = args.lr[lrCounter]
            print("Learning rate : ", kwargs['lr'])
            optimizer = optimConst(net.parameters(), **kwargs)

            if lrCounter < len(args.lr) - 1:
                lrCounter += 1

        trainDetect(net, optimizer, train_loader, epoch, writer, args)
        with torch.no_grad():
            testDetect(net, test_loader, epoch, writer, args)
Ejemplo n.º 21
0
def main(argv=None):

    #Getting arguments from config file and command line
    #Building the arg reader
    argreader = ArgReader(argv)

    argreader.parser.add_argument(
        '--max_act',
        type=str,
        nargs='*',
        metavar='VAL',
        help=
        'To visualise an image that maximise the activation of one unit in the last layer. \
                        The values are :\
                            the path to the model, \
                            the number of images to be created, \
                            the layer to optimise. Can be \'conv\' or \'dense\' \
                            the unit to optimise. If not indicated, the unit number i will be optimised if image has label number i.'
    )

    argreader.parser.add_argument(
        '--stop_thres',
        type=float,
        default=0.000005,
        metavar='NOISE',
        help=
        'If the distance travelled by parameters during activation maximisation become lesser than this parameter, the optimisation stops.'
    )

    argreader.parser.add_argument(
        '--reg_weight',
        type=float,
        default=0,
        metavar='NOISE',
        help='The weight of the regularisation during activation maximisation.'
    )

    argreader.parser.add_argument(
        '--plot_feat_map',
        type=str,
        nargs='*',
        metavar='VAL',
        help='To visualise the last feature map of a model. \
                        The values are the path to the model weights,  the number of input image to be pass through \
                        the net and the number of final feature map to plot. \
                        The --exp_id, --model_id and --model must be set.')

    #Reading the comand line arg
    argreader.getRemainingArgs()

    #Getting the args from command line and config file
    args = argreader.args
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    #The folders where the experience file will be written
    if not (os.path.exists("../vis/{}".format(args.exp_id))):
        os.makedirs("../vis/{}".format(args.exp_id))

    if args.max_act:

        modelPath = args.max_act[0]
        nbImages = int(args.max_act[1])
        layToOpti = args.max_act[2]

        random.seed(args.seed)

        #Building the net
        model = netBuilder.netMaker(args)
        model.load_state_dict(torch.load(modelPath))

        _, test_loader, _ = dataLoader.loadData(args.dataset, args.batch_size,
                                                1, False, args.cuda,
                                                args.num_workers)

        #Comouting image that maximises activation of the given unit in the given layer
        maxInd = len(test_loader.dataset) - 1

        model.eval()

        for i, (image, label) in enumerate(test_loader):

            print("Image ", i)

            img = Variable(test_loader.dataset[i][0]).unsqueeze(0)
            img.requires_grad = True

            writeImg('../vis/{}/img_'.format(args.exp_id) + str(i) + '.jpg',
                     image[0].detach().numpy())

            if len(args.max_act) == 4:
                unitInd = int(args.max_act[3])
            else:
                unitInd = label.item()

            opt(img,model,args.exp_id,args.model_id,i,unitInd=unitInd,lr=args.lr,momentum=args.momentum,optimType='LBFGS',layToOpti=layToOpti,\
                epoch=args.epochs,nbPrint=args.log_interval,stopThre=args.stop_thres,reg_weight=args.reg_weight)

            if i == nbImages - 1:
                break

    if args.plot_feat_map:

        modelPath = args.plot_feat_map[0]
        nbImages = int(args.plot_feat_map[1])
        nbFeatMaps = int(args.plot_feat_map[2])
        margin = 2

        #Building the net
        model = netBuilder.netMaker(args)
        model.load_state_dict(torch.load(modelPath))

        _, test_loader, _ = dataLoader.loadData(args.dataset, args.batch_size,
                                                1, False, args.cuda,
                                                args.num_workers)

        #Comouting image that maximises activation of the given unit in the given layer
        maxInd = len(test_loader.dataset) - 1

        model.eval()

        bigImg = None

        totalW = 0
        totalH = 0

        sortedMapInds = getMostImportantFeatMapsInd(model, args.exp_id,
                                                    args.model_id)

        imgLabelList = [test_loader.dataset[i] for i in range(nbImages)]
        imgList, _ = zip(*sorted(imgLabelList, key=lambda x: x[1]))

        for i in range(nbImages):

            img = imgList[i]
            inSize = img.shape[1], img.shape[2]
            if bigImg is None:
                bigImg = np.zeros((nbImages * (img.shape[1] + margin),
                                   (nbFeatMaps + 1) * (img.shape[2] + margin)))

            bigImg[i * (img.shape[1] + margin):(i + 1) * img.shape[1] +
                   i * margin, :img.shape[2]] = img.squeeze()

            _, featMaps = model(img.unsqueeze(0))

            #Taking only the most important feature map
            print(featMaps.shape)
            featMaps = featMaps[0, sortedMapInds]

            for j in range(1, min(11, len(featMaps[0] + 1))):

                img = featMaps[j].detach().numpy()
                img = resize(img,
                             inSize,
                             mode="constant",
                             order=0,
                             anti_aliasing=True)

                bigImg[i * (img.shape[0] + margin):(i + 1) * img.shape[0] +
                       i * margin, j * (img.shape[1] + margin):(j + 1) *
                       (img.shape[1]) + j * margin] = img

                totalW += img.shape[0] + margin

        writeImg('../vis/{}/model_{}.png'.format(args.exp_id, args.model_id),
                 bigImg[np.newaxis],
                 size=(300 * nbImages, 300 * min(11, len(featMaps[0] + 1))))
Ejemplo n.º 22
0
def main(argv=None):

    #Getting arguments from config file and command line
    #Building the arg reader
    argreader = ArgReader(argv)

    argreader.parser.add_argument(
        '--noise',
        type=float,
        metavar='NOISE',
        help=
        'the amount of noise to add in the gradient of the clustNet (in percentage)(default: 0.1)'
    )
    argreader.parser.add_argument(
        '--entweig',
        type=float,
        default=0,
        metavar='ENTWEI',
        help=
        'the weight of the clusters entropy term in the cost function (default: 0)'
    )
    argreader.parser.add_argument(
        '--clustdivers',
        type=float,
        default=0,
        metavar='ENTWEI',
        help=
        'the weight of the clusters diversity term in the cost function (default: 0)'
    )
    argreader.parser.add_argument(
        '--filter_dis',
        type=float,
        default=0,
        metavar='FILDIS',
        help=
        'the weight of the filter distance term in the cost function (default: 0)'
    )

    argreader.parser.add_argument(
        '--featmap_entr',
        type=float,
        default=0,
        metavar='FEATENT',
        help=
        'the weight of the feature map entropy term in the cost function (default: 0)'
    )
    argreader.parser.add_argument(
        '--featmap_var',
        type=float,
        default=0,
        metavar='FEATVAR',
        help=
        'the weight of the feature map var term in the cost function (default: 0)'
    )

    argreader.parser.add_argument(
        '--optim',
        type=str,
        default="SGD",
        metavar='OPTIM',
        help='the optimizer algorithm to use (default: \'SGD\')')
    argreader.parser.add_argument(
        '--noise_init',
        type=float,
        default="0",
        metavar='NOISEINIT',
        help=
        'The percentage of noise to add (relative to the filter norm) when initializing detectNets with \
                        a pre-trained detectNet')

    argreader.parser.add_argument(
        '--reverse_target',
        type=str2bool,
        default="False",
        help=
        'To inverse the positive and the negative class. Useful to train a detectNet \
                        which will be later used to produce negative feature map'
    )

    argreader.parser.add_argument(
        '--clu_train_mode',
        type=str,
        default='joint',
        metavar='TRAINMODE',
        help=
        'Determines the cluster training mode. Can be \'joint\' or \'separated\' (default: \'joint\')'
    )

    argreader.parser.add_argument('--rand_prop_val_sched',
                                  type=float,
                                  nargs='+',
                                  default=[0.9, 0.5, 0.1],
                                  metavar='RANDPROP_VAL_SCHED',
                                  help=')')
    argreader.parser.add_argument('--rand_prop_epo_sched',
                                  type=int,
                                  nargs='+',
                                  default=[0, 1, 2],
                                  metavar='RANDPROP_EPO_SCHED',
                                  help=')')

    argreader.parser.add_argument(
        '--init',
        type=str,
        default=None,
        metavar='N',
        help='the weights to use to initialize the detectNets')
    argreader.parser.add_argument(
        '--init_clu',
        type=str,
        default=None,
        metavar='N',
        help='the weights to use to initialize the clustNets')
    argreader.parser.add_argument(
        '--init_enc',
        type=str,
        default=None,
        metavar='N',
        help='the weights to use to initialize the encoder net')
    argreader.parser.add_argument(
        '--init_pos',
        type=str,
        default=None,
        metavar='N',
        help=
        'the weights to use to initialize the positive detectnets. Ignored when not training a full clust detect net'
    )
    argreader.parser.add_argument(
        '--init_neg',
        type=str,
        default=None,
        metavar='N',
        help=
        'the weights to use to initialize the negative detectNets. Ignored when not training a full clust detect net'
    )

    argreader.parser.add_argument(
        '--encapplyDropout2D',
        default=True,
        type=str2bool,
        metavar='N',
        help='whether or not to apply 2D dropout in the preprocessing net')

    #Reading the comand line arg
    argreader.getRemainingArgs()

    #Getting the args from command line and config file
    args = argreader.args

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    if args.clust < 2:
        raise ValueError(
            "The number of cluster must be at least 2. Got {}".format(
                args.clust))
    train_loader, test_loader = dataLoader.loadData(args.dataset,
                                                    args.batch_size,
                                                    args.test_batch_size,
                                                    args.cuda,
                                                    args.num_workers)

    #The group of class to detect
    np.random.seed(args.seed)
    classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    np.random.shuffle(classes)
    classToFind = classes[0:args.clust]

    #The folders where the experience file will be written
    if not (os.path.exists("../vis/{}".format(args.exp_id))):
        os.makedirs("../vis/{}".format(args.exp_id))
    if not (os.path.exists("../results/{}".format(args.exp_id))):
        os.makedirs("../results/{}".format(args.exp_id))
    if not (os.path.exists("../nets/{}".format(args.exp_id))):
        os.makedirs("../nets/{}".format(args.exp_id))

    if args.pretrain:
        netType = "detectNet"
    elif args.pretrain_cae:
        netType = "cae"
    else:
        netType = "clustDetectNet"

    #Write the arguments in a config file so the experiment can be re-run
    argreader.writeConfigFile("../nets/{}/{}{}.ini".format(
        args.exp_id, netType, args.ind_id))

    #Building the net
    net = netBuilder.netMaker(args)

    if args.cuda:
        net.cuda()

    startEpoch = initialize_Net_And_EpochNumber(net,args.pretrain,args.init,args.init_clu,args.init_enc,args.init_pos,args.init_neg,\
                                                args.exp_id,args.ind_id,args.cuda,args.noise_init,netType)

    net.classToFind = classToFind

    #Getting the contructor and the kwargs for the choosen optimizer
    optimConst, kwargs = get_OptimConstructor_And_Kwargs(
        args.optim, args.momentum)

    #If no learning rate is schedule is indicated (i.e. there's only one learning rate),
    #the args.lr argument will be a float and not a float list.
    #Converting it to a list with one element makes the rest of processing easier
    if type(args.lr) is float:
        args.lr = [args.lr]

    if type(args.lr_cl) is float:
        args.lr_cl = [args.lr_cl]

    if (not args.pretrain) and (not args.pretrain_cae):

        #Adding a hook to add noise at every weight update
        if args.noise != 0:
            gradNoise = GradNoise(ampl=args.noise)
            for p in net.getClustWeights():
                p.register_hook(gradNoise)

        #Train and evaluate the clustering detecting network for several epochs
        lrCounter = 0

        for epoch in range(startEpoch, args.epochs + 1):

            #This condition determines when the learning rate should be updated (to follow the learning rate schedule)
            #The optimiser have to be rebuilt every time the learning rate is updated
            if (epoch - 1) % (
                (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch:

                kwargs['lr'] = args.lr[lrCounter]
                print("Learning rate : ", kwargs['lr'])
                optimizerDe = optimConst(net.getDetectWeights(), **kwargs)

                kwargs['lr'] = args.lr_cl[lrCounter]
                print("Learning rate of clustNet: ", kwargs['lr'])
                optimizerCl = optimConst(net.getClustWeights(), **kwargs)

                if lrCounter < len(args.lr) - 1:
                    lrCounter += 1

            train(net, optimizerCl, optimizerDe, train_loader, epoch, args,
                  classToFind)
            test(net, test_loader, epoch, args, classToFind)

    else:
        print("Pretraining")

        if args.pretrain_cae:
            trainFunc = trainCAE
            testFunc = testCAE
            kwargsFunc = {}
        else:
            trainFunc = trainDetect
            testFunc = testDetect
            kwargsFunc = {"classToFind": classToFind}

        #Train and evaluate the detecting network for several epochs
        lrCounter = 0
        for epoch in range(startEpoch, args.epochs + 1):

            if (epoch - 1) % (
                (args.epochs + 1) // len(args.lr)) == 0 or epoch == startEpoch:

                kwargs['lr'] = args.lr[lrCounter]
                print("Learning rate : ", kwargs['lr'])
                optimizerDe = optimConst(net.parameters(), **kwargs)

                if lrCounter < len(args.lr) - 1:
                    lrCounter += 1

            trainFunc(net, optimizerDe, train_loader, epoch, args,
                      **kwargsFunc)
            testFunc(net, test_loader, epoch, args, **kwargsFunc)
Ejemplo n.º 23
0
import time
import dataLoader
from itertools import combinations

positions = dataLoader.loadData("CrowdsourcingResults.csv")

dataLoader.printPositions(positions)
print ""
print ""

bold = lambda val: ("*" + str(val) + "*")


def getHighestKey(positions, pos, key, usedPlayers=[]):
    bestPlayer = None

    def doBest(pos, bestPlayer=None):
        for player in positions[pos]:
            if player in usedPlayers:
                continue
            elif bestPlayer == None:
                bestPlayer = player
            else:
                if float(player[key]) > float(bestPlayer[key]):
                    #print bestPlayer["Player"], "->", player["Player"]
                    bestPlayer = player
        return bestPlayer

    if pos == "DH":
        # Any player can be a DH
        for position in positions: