Exemple #1
0
def trainDBN(unsupervisedLearningRate, supervisedLearningRate, visibleDropout,
             hiddenDropout, miniBatchSize, momentumMax, maxEpochs):
    print 'in trainDBN'
    trainVectors, trainLabels =\
      readmnist.read(0, TRAIN, digits=None, bTrain=True, path=PATH)

    testVectors, testLabels =\
        readmnist.read(TRAIN, TRAIN + TEST,
                       digits=None, bTrain=True, path=PATH)

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)
    print 'done reading'
    trainVectors = np.array(trainVectors, dtype='float')
    trainingScaledVectors = scale(trainVectors)

    testVectors = np.array(testVectors, dtype='float')
    testingScaledVectors = scale(testVectors)

    trainVectorLabels = labelsToVectors(trainLabels, 10)
    print 'done scaling data'
    print 'creating DBN'
    net = db.DBN(
        5,
        [784, 1000, 1000, 1000, 10],
        binary=False,
        unsupervisedLearningRate=unsupervisedLearningRate,
        supervisedLearningRate=supervisedLearningRate,
        momentumMax=momentumMax,
        nesterovMomentum=True,
        rbmNesterovMomentum=True,
        activationFunction=Rectified(),
        rbmActivationFunctionVisible=Identity(),
        rbmActivationFunctionHidden=RectifiedNoisy(),
        rmsprop=True,
        visibleDropout=visibleDropout,
        hiddenDropout=hiddenDropout,
        weightDecayL1=0,
        weightDecayL2=0,
        rbmHiddenDropout=1.0,
        rbmVisibleDropout=1.0,
        miniBatchSize=miniBatchSize,
        adversarial_training=True,
        # TODO: make this a learned param
        preTrainEpochs=100,
        sparsityConstraintRbm=False,
        sparsityTragetRbm=0.01,
        sparsityRegularizationRbm=None)

    net.train(trainingScaledVectors,
              trainVectorLabels,
              maxEpochs=200,
              validation=False)

    proabilities, predicted = net.classify(testingScaledVectors)
    error = getClassificationError(predicted, testLabels)
    print "error", error
    return error
Exemple #2
0
def trainNetWithAllData():
    unsupervisedData, data, labels = createTrainingSet()

    print "data.shape"
    print data.shape
    print "labels.shape"
    print labels.shape

    data = common.scale(data)
    unsupervisedData = None

    activationFunction = activationfunctions.Rectified()
    rbmActivationFunctionVisible = activationfunctions.Identity()
    rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy()

    unsupervisedLearningRate = 0.0001
    supervisedLearningRate = 0.001
    momentumMax = 0.99

    print "This is input data shape", data.shape
    print labels.shape

    net = db.DBN(4, [1200, 1500, 1000, len(args.emotions)],
                 binary=False,
                 activationFunction=activationFunction,
                 rbmActivationFunctionVisible=rbmActivationFunctionVisible,
                 rbmActivationFunctionHidden=rbmActivationFunctionHidden,
                 unsupervisedLearningRate=unsupervisedLearningRate,
                 supervisedLearningRate=supervisedLearningRate,
                 momentumMax=momentumMax,
                 nesterovMomentum=True,
                 rbmNesterovMomentum=True,
                 rmsprop=True,
                 miniBatchSize=20,
                 hiddenDropout=0.5,
                 visibleDropout=0.8,
                 momentumFactorForLearningRateRBM=False,
                 firstRBMheuristic=False,
                 rbmVisibleDropout=1.0,
                 rbmHiddenDropout=1.0,
                 preTrainEpochs=10,
                 sparsityConstraintRbm=False,
                 sparsityRegularizationRbm=0.001,
                 sparsityTragetRbm=0.01)

    net.train(data,
              labels,
              maxEpochs=200,
              validation=False,
              unsupervisedData=unsupervisedData)

    with open(args.net_file, "wb") as f:
        pickle.dump(net, f)
    return net
def testPicklingDBN():
  data, labels = readKanade(False, None, equalize=False)

  print "data.shape"
  print data.shape
  print "labels.shape"
  print labels.shape

  # Random data for training and testing
  kf = cross_validation.KFold(n=len(data), n_folds=5)
  for train, test in kf:
    break

  if args.relu:
    activationFunction = Rectified()
    unsupervisedLearningRate = 0.05
    supervisedLearningRate = 0.01
    momentumMax = 0.95
    data = scale(data)
    rbmActivationFunctionVisible = Identity()
    rbmActivationFunctionHidden = RectifiedNoisy()

  else:
    activationFunction = Sigmoid()
    rbmActivationFunctionVisible = Sigmoid()
    rbmActivationFunctionHidden = Sigmoid()

    unsupervisedLearningRate = 0.5
    supervisedLearningRate = 0.1
    momentumMax = 0.9

  trainData = data[train]
  trainLabels = labels[train]

  # TODO: this might require more thought
  net = db.DBN(5, [1200, 1500, 1500, 1500, 7],
             binary=1-args.relu,
             activationFunction=activationFunction,
             rbmActivationFunctionVisible=rbmActivationFunctionVisible,
             rbmActivationFunctionHidden=rbmActivationFunctionHidden,
             unsupervisedLearningRate=unsupervisedLearningRate,
             supervisedLearningRate=supervisedLearningRate,
             momentumMax=momentumMax,
             nesterovMomentum=True,
             rbmNesterovMomentum=True,
             rmsprop=True,
             miniBatchSize=20,
             hiddenDropout=0.5,
             visibleDropout=0.8,
             rbmVisibleDropout=1.0,
             rbmHiddenDropout=1.0,
             preTrainEpochs=1)

  net.train(trainData, trainLabels, maxEpochs=10,
            validation=False,
            unsupervisedData=None,
            trainingIndices=train)

  initialDict = net.__dict__


  with open(args.netFile, "wb") as f:
    pickle.dump(net, f)

  with open(args.netFile, "rb") as f:
    net = pickle.load(f)

  afterDict = net.__dict__

  del initialDict['rbmActivationFunctionHidden']
  del initialDict['rbmActivationFunctionVisible']

  del afterDict['rbmActivationFunctionHidden']
  del afterDict['rbmActivationFunctionVisible']


  for key in initialDict:
    assert key in afterDict
    if isinstance(initialDict[key], (np.ndarray, np.generic)):
      assert np.arrays_equal(initialDict[key], afterDict[key])
    else:
      assert initialDict[key] == afterDict[key]
def deepbeliefKaggleCompetitionSmallDataset(big=False):
    print "you are using the net file", args.netFile
    print "after nefile"
    trainData, trainLabels = readKaggleCompetitionSmallDataset(
        args.equalize, True)
    print trainData.shape
    print trainLabels.shape

    print "after train"
    testData, testLabels = readKaggleCompetitionSmallDataset(
        args.equalize, False)
    print testData.shape
    print testLabels.shape

    if args.relu:
        activationFunction = Rectified()
        unsupervisedLearningRate = 0.5
        supervisedLearningRate = 0.01
        momentumMax = 0.95
        trainData = scale(trainData)
        rbmActivationFunctionVisible = Identity()
        rbmActivationFunctionHidden = RectifiedNoisy()
    else:
        print "in else"
        activationFunction = Sigmoid()
        rbmActivationFunctionVisible = Sigmoid()
        rbmActivationFunctionHidden = Sigmoid()

        unsupervisedLearningRate = 0.5
        supervisedLearningRate = 0.1
        momentumMax = 0.9

    if args.train:
        print "In training"

        net = db.DBN(5, [2304, 1500, 1500, 1500, 7],
                     binary=1 - args.relu,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=rbmActivationFunctionVisible,
                     rbmActivationFunctionHidden=rbmActivationFunctionHidden,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     rmsprop=args.rmsprop,
                     miniBatchSize=args.miniBatchSize,
                     save_best_weights=args.save_best_weights,
                     firstRBMheuristic=False,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     rbmVisibleDropout=1.0,
                     rbmHiddenDropout=1.0,
                     initialInputShape=(48, 48),
                     preTrainEpochs=args.preTrainEpochs)

        unsupervisedData = readKaggleCompetitionUnlabelled()
        #unsupervisedData = None

        print "net print", net
        '''

    change april 12

    net.train(trainData, trainLabels, maxEpochs=args.maxEpochs,
              validation=args.validation,
              unsupervisedData=unsupervisedData)
              '''

        net.train(trainData, trainLabels, args.maxEpochs, args.validation,
                  unsupervisedData)
    else:
        # Take the saved network and use that for reconstructions
        with open(args.netFile, "rb") as f:
            net = pickle.load(f)

    print "nr layers: ", net.layerSizes

    probs, predicted = net.classify(testData)

    correct = 0
    errorCases = []

    for i in xrange(len(testData)):
        print "predicted"
        print "probs"
        print probs[i]
        print "predicted"
        print predicted[i]
        print "actual"
        actual = testLabels[i]
        print np.argmax(actual)
        if predicted[i] == np.argmax(actual):
            correct += 1
        else:
            errorCases.append(i)

    print "correct"
    print correct

    print "percentage correct"
    print correct * 1.0 / len(testData)

    confMatrix = confusion_matrix(np.argmax(testLabels, axis=1), predicted)
    print "confusion matrix"
    print confMatrix

    if args.save:
        with open(args.netFile, "wb") as f:
            print "you are saving in file", args.netFile
            pickle.dump(net, f)
Exemple #5
0
def cvadversarialMNIST():
    training = args.trainSize
    testing = args.testSize

    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)
    testVectors, testLabels =\
        readmnist.read(0, testing, bTrain=False, path=args.path)
    print trainVectors[0].shape

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)

    activationFunction = Sigmoid()

    trainingScaledVectors = trainVectors / 255.0

    vectorLabels = labelsToVectors(trainLabels, 10)

    bestFold = -1
    bestError = np.inf

    params = [(5e-02, 1e-02), (1e-02, 5e-02), (5e-02, 5e-02), (1e-02, 5e-03),
              (5e-02, 5e-03)]

    correctness = []

    nrFolds = len(params)

    kf = cross_validation.KFold(n=training, n_folds=nrFolds)

    i = 0
    for train, test in kf:
        print "cv fold", i
        print "params", params[i]

        net = db.DBN(5, [784, 1500, 1500, 1500, 10],
                     binary=False,
                     unsupervisedLearningRate=params[i][0],
                     supervisedLearningRate=params[i][1],
                     momentumMax=0.95,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=activationFunction,
                     rbmActivationFunctionHidden=activationFunction,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     rmsprop=args.rmsprop,
                     save_best_weights=args.save_best_weights,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     adversarial_training=args.adversarial_training,
                     adversarial_coefficient=0.5,
                     adversarial_epsilon=1.0 / 255,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     preTrainEpochs=args.preTrainEpochs)

        net.train(trainingScaledVectors[train],
                  vectorLabels[train],
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)

        proabilities, predicted = net.classify(trainingScaledVectors[test])
        # Test it with the testing data and measure the missclassification error
        error = getClassificationError(predicted, trainLabels[test])

        print "error for " + str(params[i])
        print error

        correct = 1.0 - error

        if error < bestError:
            bestError = error
            bestFold = i

        i += 1

        correctness += [correct]

    print "best fold was " + str(bestFold)
    print "bestParameter " + str(params[bestFold])
    print "bestError " + str(bestError)

    for i in xrange(len(params)):
        print "parameter tuple " + str(
            params[i]) + " achieved correctness of " + str(correctness[i])
Exemple #6
0
def cvMNISTGaussian():
    training = args.trainSize

    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)

    trainVectors = np.array(trainVectors, dtype='float')

    # Ensure the data has zero mean and 1 variance
    trainingScaledVectors = scale(trainVectors)
    vectorLabels = labelsToVectors(trainLabels, 10)

    bestFold = -1
    bestError = np.inf

    params = [(5e-03, 1e-02), (1e-02, 5e-02), (5e-03, 5e-02), (1e-02, 5e-03),
              (5e-03, 5e-03), (1e-02, 1e-02)]

    correctness = []

    nrFolds = len(params)

    kf = cross_validation.KFold(n=training, n_folds=nrFolds)

    i = 0
    for train, test in kf:
        # Train the net
        # Try 1200, 1200, 1200
        net = db.DBN(5, [784, 1000, 1000, 1000, 10],
                     binary=False,
                     unsupervisedLearningRate=params[i][0],
                     supervisedLearningRate=params[i][1],
                     momentumMax=0.95,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     activationFunction=Rectified(),
                     rbmActivationFunctionVisible=Identity(),
                     rbmActivationFunctionHidden=RectifiedNoisy(),
                     rmsprop=args.rmsprop,
                     save_best_weights=args.save_best_weights,
                     visibleDropout=0.8,
                     hiddenDropout=0.5,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     miniBatchSize=args.miniBatchSize,
                     preTrainEpochs=args.preTrainEpochs,
                     sparsityConstraintRbm=False,
                     sparsityTragetRbm=0.01,
                     sparsityRegularizationRbm=None)

        net.train(trainingScaledVectors[train],
                  vectorLabels[train],
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)

        proabilities, predicted = net.classify(trainingScaledVectors[test])
        # Test it with the testing data and measure the missclassification error
        error = getClassificationError(predicted, trainLabels[test])

        print "error for " + str(params[i])
        print error

        correct = 1.0 - error

        if error < bestError:
            bestError = error
            bestFold = i

        i += 1

        correctness += [correct]

    print "best fold was " + str(bestFold)
    print "bestParameter " + str(params[bestFold])
    print "bestError " + str(bestError)

    for i in xrange(len(params)):
        print "parameter tuple " + str(
            params[i]) + " achieved correctness of " + str(correctness[i])
Exemple #7
0
def deepbeliefMNISTGaussian():
    training = args.trainSize
    testing = args.testSize

    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)
    testVectors, testLabels =\
        readmnist.read(0, testing, bTrain=False, path=args.path)
    print trainVectors[0].shape

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)

    trainVectors = np.array(trainVectors, dtype='float')
    trainingScaledVectors = scale(trainVectors)

    testVectors = np.array(testVectors, dtype='float')
    testingScaledVectors = scale(testVectors)

    vectorLabels = labelsToVectors(trainLabels, 10)

    unsupervisedLearningRate = 0.005
    supervisedLearningRate = 0.005
    momentumMax = 0.97
    sparsityTragetRbm = 0.01
    sparsityConstraintRbm = False
    sparsityRegularizationRbm = 0.005

    if args.train:
        net = db.DBN(5, [784, 1200, 1200, 1200, 10],
                     binary=False,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     activationFunction=Rectified(),
                     rbmActivationFunctionVisible=Identity(),
                     rbmActivationFunctionHidden=RectifiedNoisy(),
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     save_best_weights=args.save_best_weights,
                     rmsprop=args.rmsprop,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     sparsityTragetRbm=sparsityTragetRbm,
                     sparsityConstraintRbm=sparsityConstraintRbm,
                     sparsityRegularizationRbm=sparsityRegularizationRbm,
                     preTrainEpochs=args.preTrainEpochs)

        net.train(trainingScaledVectors,
                  vectorLabels,
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)
    else:
        # Take the saved network and use that for reconstructions
        f = open(args.netFile, "rb")
        net = pickle.load(f)
        f.close()

    probs, predicted = net.classify(testingScaledVectors)
    print type(predicted)
    correct = 0
    errorCases = []
    for i in xrange(testing):
        print "predicted"
        print "probs"
        print probs[i]
        print predicted[i]
        print "actual"
        actual = testLabels[i]
        print actual
        if predicted[i] == actual:
            correct += 1
        else:
            errorCases.append(i)

    print "correct"
    print correct

    if args.save:
        f = open(args.netFile, "wb")
        pickle.dump(net, f)
        f.close()
Exemple #8
0
def deepbeliefMNIST():
    assert not args.relu, "do not run this method for rectified linear units"

    training = args.trainSize
    testing = args.testSize

    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)
    testVectors, testLabels =\
        readmnist.read(0, testing, bTrain=False, path=args.path)
    print trainVectors[0].shape

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)

    activationFunction = Sigmoid()

    trainingScaledVectors = trainVectors / 255.0
    testingScaledVectors = testVectors / 255.0

    vectorLabels = labelsToVectors(trainLabels, 10)

    unsupervisedLearningRate = 0.01
    supervisedLearningRate = 0.05
    momentumMax = 0.95

    if args.train:
        net = db.DBN(5, [784, 1000, 1000, 1000, 10],
                     binary=False,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=activationFunction,
                     rbmActivationFunctionHidden=activationFunction,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     rmsprop=args.rmsprop,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     save_best_weights=args.save_best_weights,
                     adversarial_training=args.adversarial_training,
                     adversarial_coefficient=0.5,
                     adversarial_epsilon=1.0 / 255,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     preTrainEpochs=args.preTrainEpochs)
        net.train(trainingScaledVectors,
                  vectorLabels,
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)
    else:
        # Take the saved network and use that for reconstructions
        f = open(args.netFile, "rb")
        net = pickle.load(f)
        f.close()

    probs, predicted = net.classify(testingScaledVectors)
    correct = 0
    errorCases = []
    for i in xrange(testing):
        print "predicted"
        print "probs"
        print probs[i]
        print predicted[i]
        print "actual"
        actual = testLabels[i]
        print actual
        if predicted[i] == actual:
            correct += 1
        else:
            errorCases.append(i)

    # Mistakes for digits
    # You just need to display some for the report
    # trueDigits = testLabels[errorCases]
    # predictedDigits = predicted[errorCases]

    print "correct"
    print correct

    # for w in net.weights:
    #   print w

    # for b in net.biases:
    #   print b

    # t = visualizeWeights(net.weights[0].T, trainImages[0].(28, 28), (10,10))
    # plt.imshow(t, cmap=plt.cm.gray)
    # plt.show()
    # print "done"

    if args.save:
        f = open(args.netFile, "wb")
        pickle.dump(net, f)
        f.close()
Exemple #9
0
def cvMNIST():
    assert not args.relu, "do not run this function for rectified linear units"
    training = args.trainSize

    data, labels =\
        readmnist.read(0, training, bTrain=True, path=args.path)

    data, labels = shuffle(data, labels)
    scaledData = data / 255.0
    vectorLabels = labelsToVectors(labels, 10)

    activationFunction = Sigmoid()

    bestFold = -1
    bestError = np.inf

    if args.relu:
        # params =[(0.01, 0.01) , (0.01, 0.05), (0.05, 0.1), (0.05, 0.05)]
        # params =[(0.0001, 0.01), (0.00001, 0.001), (0.00001, 0.0001), (0.0001, 0.1)]
        params = [(1e-05, 0.001, 0.9), (5e-06, 0.001, 0.9),
                  (5e-05, 0.001, 0.9), (1e-05, 0.001, 0.95),
                  (5e-06, 0.001, 0.95), (5e-05, 0.001, 0.95),
                  (1e-05, 0.001, 0.99), (5e-06, 0.001, 0.99),
                  (5e-05, 0.001, 0.99)]
    else:
        # params =[(0.1, 0.1) , (0.1, 0.05), (0.05, 0.1), (0.05, 0.05)]
        params = [(0.05, 0.05), (0.05, 0.075), (0.075, 0.05), (0.075, 0.075)]
        # params =[(0.05, 0.075, 0.1), (0.05, 0.1, 0.1), (0.01, 0.05, 0.1),
        #          (0.05, 0.075, 0.01), (0.05, 0.1, 0.01), (0.01, 0.05, 0.01),
        #          (0.05, 0.075, 0.001), (0.05, 0.1, 0.001), (0.01, 0.05, 0.001)]

    nrFolds = len(params)
    kf = cross_validation.KFold(n=training, n_folds=nrFolds)

    i = 0
    for training, testing in kf:
        # Train the net
        # Try 1200, 1200, 1200

        trainData = scaledData[training]
        trainLabels = vectorLabels[training]

        # net = db.DBN(5, [784, 1000, 1000, 1000, 10],
        net = db.DBN(5, [784, 500, 500, 2000, 10],
                     binary=1 - args.relu,
                     unsupervisedLearningRate=params[i][0],
                     supervisedLearningRate=params[i][1],
                     momentumMax=0.95,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=activationFunction,
                     rbmActivationFunctionHidden=activationFunction,
                     rmsprop=args.rmsprop,
                     save_best_weights=args.save_best_weights,
                     visibleDropout=0.8,
                     hiddenDropout=0.5,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     miniBatchSize=args.miniBatchSize,
                     preTrainEpochs=args.preTrainEpochs,
                     sparsityTragetRbm=0.01,
                     sparsityConstraintRbm=False,
                     sparsityRegularizationRbm=None)

        net.train(trainData,
                  trainLabels,
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)

        proabilities, predicted = net.classify(scaledData[testing])

        testLabels = labels[testing]
        # Test it with the testing data and measure the missclassification error
        error = getClassificationError(predicted, testLabels)

        print "error for " + str(params[i])
        print error

        if error < bestError:
            bestError = error
            bestFold = i
        i += 1

    print "best fold was " + str(bestFold)
    print "bestParameter " + str(params[bestFold])
    print "bestError" + str(bestError)
Exemple #10
0
def adversarialMNIST():
    assert not args.relu, "do not run this method for rectified linear units"

    training = args.trainSize
    testing = args.testSize

    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)
    testVectors, testLabels =\
        readmnist.read(0, testing, bTrain=False, path=args.path)
    print trainVectors[0].shape

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)

    activationFunction = Sigmoid()

    trainingScaledVectors = trainVectors / 255.0
    testingScaledVectors = testVectors / 255.0

    vectorLabels = labelsToVectors(trainLabels, 10)

    unsupervisedLearningRate = 0.01
    supervisedLearningRate = 0.05
    momentumMax = 0.95

    if args.train:
        net = db.DBN(5, [784, 1000, 1000, 1000, 10],
                     binary=False,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=activationFunction,
                     rbmActivationFunctionHidden=activationFunction,
                     nesterovMomentum=args.nesterov,
                     rbmNesterovMomentum=args.rbmnesterov,
                     rmsprop=args.rmsprop,
                     save_best_weights=args.save_best_weights,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     rbmHiddenDropout=1.0,
                     rbmVisibleDropout=1.0,
                     adversarial_training=args.adversarial_training,
                     adversarial_coefficient=0.5,
                     adversarial_epsilon=1.0 / 255,
                     weightDecayL1=0,
                     weightDecayL2=0,
                     preTrainEpochs=args.preTrainEpochs)
        net.train(trainingScaledVectors,
                  vectorLabels,
                  maxEpochs=args.maxEpochs,
                  validation=args.validation)
    else:
        # Take the saved network and use that for reconstructions
        f = open(args.netFile, "rb")
        net = pickle.load(f)
        f.close()

    probs, predicted = net.classify(testingScaledVectors)
    correct = 0
    errorCases = []
    for i in xrange(testing):
        print "predicted"
        print "probs"
        print probs[i]
        print predicted[i]
        print "actual"
        actual = testLabels[i]
        print actual
        if predicted[i] == actual:
            correct += 1
        else:
            errorCases.append(i)

    print "correct"
    print correct
def getHyperParamsAndBestNet():
    unsupervisedData, data, labels = createTrainingSet()

    print np.unique(np.argmax(labels, axis=1))

    print "data.shape"
    print data.shape
    print "labels.shape"
    print labels.shape

    print data
    data = common.scale(data)
    unsupervisedData = None

    activationFunction = activationfunctions.Rectified()
    rbmActivationFunctionVisible = activationfunctions.Identity()
    rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy()

    tried_params = []
    percentages = []
    best_index = 0
    index = 0
    best_correct = 0

    # Random data for training and testing
    kf = cross_validation.KFold(n=len(data), n_folds=10)
    for train, test in kf:
        unsupervisedLearningRate = random.uniform(0.0001, 0.2)
        supervisedLearningRate = random.uniform(0.0001, 0.2)
        momentumMax = random.uniform(0.7, 1)

        tried_params += [{
            'unsupervisedLearningRate': unsupervisedLearningRate,
            'supervisedLearningRate': supervisedLearningRate,
            'momentumMax': momentumMax
        }]

        trainData = data[train]
        trainLabels = labels[train]

        net = db.DBN(4,
                     [1200, 1500, 1000, len(args.emotions)],
                     binary=False,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=rbmActivationFunctionVisible,
                     rbmActivationFunctionHidden=rbmActivationFunctionHidden,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     nesterovMomentum=True,
                     rbmNesterovMomentum=True,
                     rmsprop=True,
                     miniBatchSize=20,
                     hiddenDropout=0.5,
                     visibleDropout=0.8,
                     momentumFactorForLearningRateRBM=False,
                     firstRBMheuristic=False,
                     rbmVisibleDropout=1.0,
                     rbmHiddenDropout=1.0,
                     preTrainEpochs=10,
                     sparsityConstraintRbm=False,
                     sparsityRegularizationRbm=0.001,
                     sparsityTragetRbm=0.01)

        net.train(trainData,
                  trainLabels,
                  maxEpochs=200,
                  validation=False,
                  unsupervisedData=unsupervisedData)

        probs, predicted = net.classify(data[test])

        actualLabels = labels[test]
        correct = 0

        for i in xrange(len(test)):
            actual = actualLabels[i]
            print probs[i]
            if predicted[i] == np.argmax(actual):
                correct += 1

        percentage_correct = correct * 1.0 / len(test)
        print "percentage correct"
        print percentage_correct

        if percentage_correct > best_correct:
            best_index = index
            best_correct = percentage_correct
            with open(args.net_file, "wb") as f:
                pickle.dump(net, f)

        percentages += [percentage_correct]
        index += 1

    print 'best params'
    print tried_params[best_index]
    print 'precision'
    print best_correct
def trainAndTestNet():
    unsupervisedData, data, labels = createTrainingSet()

    print np.unique(np.argmax(labels, axis=1))

    print "data.shape"
    print data.shape
    print "labels.shape"
    print labels.shape

    # Random data for training and testing
    kf = cross_validation.KFold(n=len(data), k=5)
    for train, test in kf:
        break

    print data
    data = common.scale(data)
    unsupervisedData = None

    activationFunction = activationfunctions.Rectified()
    rbmActivationFunctionVisible = activationfunctions.Identity()
    rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy()

    unsupervisedLearningRate = 0.0001
    supervisedLearningRate = 0.001
    momentumMax = 0.99

    trainData = data[train]
    trainLabels = labels[train]

    net = db.DBN(4, [1200, 1500, 1000, len(args.emotions)],
                 binary=False,
                 activationFunction=activationFunction,
                 rbmActivationFunctionVisible=rbmActivationFunctionVisible,
                 rbmActivationFunctionHidden=rbmActivationFunctionHidden,
                 unsupervisedLearningRate=unsupervisedLearningRate,
                 supervisedLearningRate=supervisedLearningRate,
                 momentumMax=momentumMax,
                 nesterovMomentum=True,
                 rbmNesterovMomentum=True,
                 rmsprop=True,
                 miniBatchSize=20,
                 hiddenDropout=0.5,
                 visibleDropout=0.8,
                 momentumFactorForLearningRateRBM=False,
                 firstRBMheuristic=False,
                 rbmVisibleDropout=1.0,
                 rbmHiddenDropout=1.0,
                 preTrainEpochs=10,
                 sparsityConstraintRbm=False,
                 sparsityRegularizationRbm=0.001,
                 sparsityTragetRbm=0.01)

    net.train(trainData,
              trainLabels,
              maxEpochs=200,
              validation=False,
              unsupervisedData=unsupervisedData)

    probs, predicted = net.classify(data[test])

    actualLabels = labels[test]
    correct = 0
    errorCases = []

    for i in xrange(len(test)):
        actual = actualLabels[i]
        print probs[i]
        if predicted[i] == np.argmax(actual):
            correct += 1
        else:
            errorCases.append(i)

    print "correct"
    print correct

    print "percentage correct"
    print correct * 1.0 / len(test)

    confMatrix = confusion_matrix(np.argmax(actualLabels, axis=1), predicted)
    print "confusion matrix"
    print confMatrix

    with open(args.net_file, "wb") as f:
        pickle.dump(net, f)
    return net