예제 #1
0
def build_dbn(argv, n_features):
    """argv: units epochs epochs_pretrain learn_rates learn_rates_pretrain"""
    from nolearn.dbn import DBN

    units = [n_features] + [int(n) for n in argv[0].split("-")] + [2]
    n_layers = len(units) - 2

    learn_rates = eval(argv[3])
    learn_rates_pretrain = eval(argv[4])

    parameters = {
        "epochs": int(argv[1]),
        "epochs_pretrain": int(argv[2]),
        "learn_rates": learn_rates,
        "learn_rates_pretrain": learn_rates_pretrain,
        "l2_costs": 0.0,
        "l2_costs_pretrain": 0.0001,
        "momentum": 0.9,
        "verbose": 0,
        "real_valued_vis": True,
        "use_re_lu": False,
        "scales": 0.01,
        "minibatch_size": 200,
        "dropouts": [0.2] + [0.5] * n_layers,
    }

    dbn = DBN(units, **parameters)

    clf = Pipeline(steps=[('scale', StandardScaler()), ('dbn', dbn)])

    return clf
예제 #2
0
파일: clf.py 프로젝트: osgee/redigit
    def __init__(self):
        # images_train=data_train[:,1:]
        # trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.5)

        # #load test.csv
        # test = pd.read_csv("data/test.csv")
        # data_test=test.as_matrix()
        # testX, _testX = train_test_split(data_test/255.,test_size=0.99)
        
        # Random Forest
        # self.clf = RandomForestClassifier()
        
        # Stochastic Gradient Descent
        # self.clf = SGDClassifier()
        
        # Support Vector Machine
        # self.clf = LinearSVC()
        
        # Nearest Neighbors
        # self.clf = KNeighborsClassifier(n_neighbors=13)
        
        
        train = pd.read_csv("data/train.csv")
        data_train=train.as_matrix()
        values_train=data_train[:,0]
        images_train=data_train[:,1:]
        trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.995)
        
        # Neural Network
        self.clf = DBN([trainX.shape[1], 300, 10],learn_rates=0.3,learn_rate_decays=0.9,epochs=10,verbose = 1)
        
        #Training
        self.clf.fit(trainX, trainY)
        
        pass
예제 #3
0
    def _build_clf(self):
        clf_params = self.get_params()
        del clf_params["features"]
        del clf_params["layers"]
        clf_params["layer_sizes"] = [-1] + self.layers + [-1]

        return DBN(**clf_params)
예제 #4
0
def generate_nnet(feats):
    """Generate a neural network.

    Parameters
    ----------
    feats : list with at least one feature vector

    Returns
    -------
    Neural network object
    """
    # Load it here to prevent crash of --help when it's not present
    from nolearn.dbn import DBN

    input_shape = (None,
                   feats[0].shape[0],
                   feats[0].shape[1],
                   feats[0].shape[2])
    logging.info("input shape: %s", input_shape)
    net1 = DBN([input_shape[1] * input_shape[2] * input_shape[3],
                300,
                2],
               learn_rates=0.3,
               learn_rate_decays=0.9,
               epochs=10,
               verbose=1)
    return net1
예제 #5
0
파일: dbn_nolearn.py 프로젝트: LKQer/Kellis
def main():
  data_id = 'B'
  data_path = '/broad/compbio/maxwshen/data/1-MAKETRAINTEST/complete/golf/'
  
  print 'train...', datetime.datetime.now()
  train_set = readin(data_id, 'train', data_path)
  print 'valid...', datetime.datetime.now()
  valid_set = readin(data_id, 'valid', data_path)
  print 'test...', datetime.datetime.now()
  test_set = readin(data_id, 'test', data_path)

  # Input to 300 node RBM to 2 node output
  dbn = DBN( \
    [xtrain.shape[1], 300, 2], \
    learn_rates = 5, \
    learn_rate_decays = 0.9, \
    epochs = 31, \
    verbose = 1)
  dbn.fit(dat_train, y_train)

  preds = dbn.predict(dat_test)
  print classification_report(y_test, preds)

  out_fn = 'dbn.pickle'
  with open(out_fn, 'w') as f:
    pickle.dump(dbn, out_fn)

  return
예제 #6
0
def neuralnetwork(X_train, X_test, y_train, y_test):
    st = "NN"
    print "Neural Network"
    # labels.append(st)
    clf_nn = DBN(learn_rates=0.3,learn_rate_decays=0.9,epochs=15)
    clf_nn.fit(X_train, y_train)
    acc_nn = clf_nn.score(X_test,y_test)
    accuracies.append(acc_nn*100)
예제 #7
0
def deep_belief_network():
    dbn = DBN([trainX.shape[1], 300, 10],
              learn_rates=0.3,
              learn_rate_decays=0.9,
              epochs=10,
              verbose=1)
    dbn.fit(trainX, trainY)
    preds = dbn.predict(testX)
    print classification_report(testY, preds)
예제 #8
0
def getOfflineClassifiers(param):
    batch_classifiers = {
        'SVM': svm.SVC(gamma=0.001, C=10., class_weight="auto"),
        #'Random Forest': RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        'LR': LogisticRegression(class_weight="auto"),
        'KNN': KNeighborsClassifier(n_neighbors=3),
        'DT': tree.DecisionTreeClassifier(),
        'DBN': DBN([param, 300, 2],learn_rates = 0.3,learn_rate_decays = 0.9,epochs = 2,verbose = 1),
    }
    return batch_classifiers
예제 #9
0
def train(args):
    print("Loading embeddings.")
    fname = "{}/labels.csv".format(args.workDir)
    labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    labels = map(itemgetter(1),
                 map(os.path.split,
                     map(os.path.dirname, labels)))  # Get the directory.
    fname = "{}/reps.csv".format(args.workDir)
    embeddings = pd.read_csv(fname, header=None).as_matrix()
    le = LabelEncoder().fit(labels)
    labelsNum = le.transform(labels)
    nClasses = len(le.classes_)
    print("Training for {} classes.".format(nClasses))

    if args.classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif args.classifier == 'GMM':  # Doesn't work best
        clf = GMM(n_components=nClasses)

    # ref:
    # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
    elif args.classifier == 'RadialSvm':  # Radial Basis Function kernel
        # works better with C = 1 and gamma = 2
        clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
    elif args.classifier == 'DecisionTree':  # Doesn't work best
        clf = DecisionTreeClassifier(max_depth=20)
    elif args.classifier == 'GaussianNB':
        clf = GaussianNB()

    # ref: https://jessesw.com/Deep-Learning/
    elif args.classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN([embeddings.shape[1], 500, labelsNum[-1:][0] + 1],  # i/p nodes, hidden nodes, o/p nodes
                  learn_rates=0.3,
                  # Smaller steps mean a possibly more accurate result, but the
                  # training will take longer
                  learn_rate_decays=0.9,
                  # a factor the initial learning rate will be multiplied by
                  # after each iteration of the training
                  epochs=300,  # no of iternation
                  # dropouts = 0.25, # Express the percentage of nodes that
                  # will be randomly dropped as a decimal.
                  verbose=1)

    if args.ldaDim > 0:
        clf_final = clf
        clf = Pipeline([('lda', LDA(n_components=args.ldaDim)),
                        ('clf', clf_final)])

    clf.fit(embeddings, labelsNum)

    fName = "{}/classifier.pkl".format(args.workDir)
    print("Saving classifier to '{}'".format(fName))
    with open(fName, 'w') as f:
        pickle.dump((le, clf), f)
예제 #10
0
파일: nn.py 프로젝트: mishadr/Dialect
def train(X, Y, alphabet):
    model = DBN(
        [13, 1000, len(alphabet)],
        learn_rates=0.3,
        learn_rate_decays=0.9,
        epochs=10,
        verbose=1,
    )

    model.fit(X, Y)
    return model
예제 #11
0
    def train(self, dataset):
        (trainX, trainY) = dataset
        dbn = DBN(
            [trainX.shape[1], 300, len(set(trainY))],
            learn_rates=0.5,
            learn_rate_decays=0.9,
            epochs=100,
            verbose=1)
        dbn.fit(trainX, trainY)

        self._dbn = dbn
예제 #12
0
파일: classifier.py 프로젝트: 4Dager/4DFace
def train(args):
    print("Loading embeddings.")
    fname = "{}/labels.csv".format(args.workDir)
    labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    labels = map(itemgetter(1), map(os.path.split, map(os.path.dirname,
                                                       labels)))
    fname = "{}/reps.csv".format(args.workDir)
    embeddings = pd.read_csv(fname, header=None).as_matrix()
    le = LabelEncoder().fit(labels)
    labelsNum = le.transform(labels)
    nClasses = len(le.classes_)
    print("Training for {} classes.".format(nClasses))

    if args.classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif args.classifier == 'GridSearchSvm':
        param_grid = [{
            'C': [1, 10, 100, 1000],
            'kernel': ['linear']
        }, {
            'C': [1, 10, 100, 1000],
            'gamma': [0.001, 0.0001],
            'kernel': ['rbf']
        }]
        clf = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
    elif args.classifier == 'GMM':
        clf = GMM(n_components=nClasses)
    elif args.classifier == 'RadialSvm':
        clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
    elif args.classifier == 'DecisionTree':
        clf = DecisionTreeClassifier(max_depth=20)
    elif args.classifier == 'GaussianNB':
        clf = GaussianNB()

    elif args.classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN([embeddings.shape[1], 500, labelsNum[-1:][0] + 1],
                  learn_rates=0.3,
                  learn_rate_decays=0.9,
                  epochs=300,
                  verbose=1)

    if args.ldaDim > 0:
        clf_final = clf
        clf = Pipeline([('lda', LDA(n_components=args.ldaDim)),
                        ('clf', clf_final)])

    clf.fit(embeddings, labelsNum)

    fName = "{}/classifier.pkl".format(args.workDir)
    print("Saving classifier to '{}'".format(fName))
    with open(fName, 'w') as f:
        pickle.dump((le, clf), f)
예제 #13
0
def train_clf(dim, X, y, classificator):
    print("Training for {} classes".format(dim[2]))
    if classificator == "DBN":
        clf = DBN(dim,
                  learn_rates=dbn_learn_rates,
                  learn_rate_decays=dbn_learn_rate_decays,
                  epochs=dbn_epochs,
                  minibatch_size=dbn_minibatch_size,
                  verbose=dbn_verbose,
                  dropouts=dbn_dropouts)
    elif classificator == "GaussianNB":
        clf = GaussianNB()

    clf.fit(X, y)

    return clf
예제 #14
0
파일: dbn.py 프로젝트: LKQer/Kellis
def main():
    data_fn = '/home/ec2-user/Kellis/data/bravo.formatted/dat.all.txt'
    blacklist_fn = '/home/ec2-user/Kellis/data/bravo.formatted/dat.blacklist.txt'
    y_fn = '/home/ec2-user/Kellis/data/bravo.formatted/dat.y.txt'

    data = read_delimited_txt(data_fn, '\t')
    blacklist = read_delimited_txt(blacklist_fn, '\t')
    y = read_delimited_txt(y_fn, '\t')

    # Get names and remove the first element of each row which is the row number
    names = data[0]
    data = data[1:]
    for i in range(len(data)):
        data[i] = data[i][1:]

    y = y[1:]
    for i in range(len(y)):
        y[i] = y[i][-1]
    y = convert_y_binary(y)

    # Normalizes column-wise so all values are between 0 and 1
    data = normalize_0_1(data)

    # Split into training, testing
    xtrain, xtest, ytrain, ytest = train_test_split(data,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

    # Input to 300 node RBM to 2 node output
    dbn = DBN( \
      [xtrain.shape[1], 300, 2], \
      learn_rates = 5, \
      learn_rate_decays = 0.9, \
      epochs = 501, \
      verbose = 1)
    dbn.fit(xtrain, ytrain)

    preds = dbn.predict(xtest)
    print classification_report(ytest, preds)

    out_fn = 'dbn.pickle'
    with open(out_fn, 'w') as f:
        pickle.dump(dbn, out_fn)

    return
예제 #15
0
def train(workDir, classifier='DBN', ldaDim=-1):
    """
    Function that performs training on the image representation using the default classifier model. A different model can be specificed.

    parameter: image representation, classifier, ldaDim
    Return: pickle file

    """

    print('Loading embeddings')
    file_name = '{}/labels.csv'.format(workDir)
    labels = pd.read_csv(file_name, header=None).as_matrix()[:, 1]
    labels = map(itemgetter(1), map(os.path.split,
                                    map(os.path.dirname,
                                        labels)))  #Gets the image directory
    file_name = "{}/reps.csv".format(workDir)
    embeddings = pd.read_csv(file_name, header=None).as_matrix()
    le = LabelEncoder().fit(labels)
    labelsNum = le.transform(labels)
    nClasses = len(le.classes_)
    print("Training for {} classes.".format(nClasses))

    if classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif classifier == 'GaussianNB':
        clf = GaussianNB()
    elif classifier == 'DBN':
        from nolearn.dbn import DBN
        print(labelsNum[-1:][0] + 1, embeddings.shape)
        clf = DBN(
            [embeddings.shape[1], 500, labelsNum[-1:][0] + 1],
            learn_rates=0.3,
            learn_rate_decays=0.9,
            learn_rates_pretrain=0.005,
            epochs=300,  #No of iterations
            minibatch_size=1
        )  #for a small data size set minibatch_size to 1. Otherwise the default is 64
    if ldaDim > 0:
        clf_final = clf
        clf = Pipeline([('lda', LDA(n_components=ldaDim)), ('clf', clf_final)])
    clf.fit(embeddings, labelsNum)

    file_name = '{}/classifier.pkl'.format(workDir)
    print("Saving classifier to '{}'".format(file_name))
    with open(file_name, 'w') as f:
        pickle.dump((le, clf), f)
예제 #16
0
def run():
    X_train, Y_train = load_training_data()

    X_train, Y_train = rotate_dataset(X_train, Y_train, 8)
    X_train, Y_train = nudge_dataset(X_train, Y_train)

    n_features = X_train.shape[1]
    n_classes = 10
    classifier = DBN([n_features, 8000, n_classes],
                     learn_rates=0.4,
                     learn_rate_decays=0.9,
                     epochs=75,
                     verbose=1)

    classifier.fit(X_train, Y_train)

    test_data = get_test_data_set()
    predictions = classifier.predict(test_data)
    write_predictions_to_csv(predictions)
예제 #17
0
def runOfflineML(y, X, classifiers, savemodel=False):
    X_train, X_test, y_train, y_test = train_test_split(X, y.astype("int0"), test_size=0.20, random_state=0)
    data = dict(
            x_train=X_train,
            x_test=X_test,
            y_train=y_train,
            y_test=y_test
        )
    cls_stats = initClsStats(classifiers)
    for cls_name, cls in classifiers.items():
        cls_stats[cls_name]['n_train'] = data['x_train'].shape[0]
        cls_stats[cls_name]['n_test'] = data['x_test'].shape[0]
        cls_stats[cls_name]['n_features'] = data['x_train'].shape[1]
        tick = time.time()
        if cls_name == 'DBN':
            data = dataNormalise(data)
            clf = DBN([data['x_train'].shape[1], 300, 2],learn_rates = 0.3,learn_rate_decays = 0.9,epochs = 10,verbose = 1)
            clf.fit(data['x_train'], data['y_train'])
        else:
            clf = classifiers[cls_name].fit(data['x_train'], data['y_train'])
        if savemodel:
            pickle.dump(clf, open(cls_name + '.dat', 'w'))
            clf = pickle.load(open(cls_name + '.dat', 'r'))
        cls_stats[cls_name]['training_time'] += time.time() - tick
        # check the accuracy on the training set
        tick = time.time()
        predicted = clf.predict(data['x_test'])
        cls_stats[cls_name]['testing_time'] += time.time() - tick
        acc = metrics.accuracy_score(data['y_test'], predicted)
        cls_stats[cls_name]['accuracy'] = acc
        print cls_name, "accuracy is: " + str(acc)
        #auc = metrics.roc_auc_score(data['y_test'], probs[:, 1])
        conf_matrix = metrics.confusion_matrix(data['y_test'], predicted)
        cls_stats[cls_name]['conf_matrix'] = conf_matrix
        #print conf_matrix
        precision, recall, fscore, support = metrics.precision_recall_fscore_support(data['y_test'], predicted)
        cls_stats[cls_name]['precision'] = precision
        cls_stats[cls_name]['recall'] = recall
        cls_stats[cls_name]['fscore'] = fscore
        cls_stats[cls_name]['support'] = support
    return cls_stats
예제 #18
0
    def fit(self, X, y, X_pretrain=None):
        from nolearn.dbn import DBN

        if y.ndim == 2:
            n_outputs = y.shape[1]
        else:
            y = y[:, np.newaxis]
            n_outputs = 1

        params = dict(self.__dict__)
        from gdbn.activationFunctions import Linear
        params['output_act_funct'] = Linear()

        n_units = params.pop('n_units')
        n_hidden_layers = params.pop('n_hidden_layers')
        if isinstance(n_units, int):
            units = [n_units] * n_hidden_layers
        else:
            units = n_units
        units = [X.shape[1]] + units + [n_outputs]
        self.dbn = DBN(units, **params)
        print X.shape
        self.dbn.fit(X, y, X_pretrain=X_pretrain)
def compute_dbn(features, labels, n_classes):
    nb_folds = 10
    nb_n_classes = len(n_classes)
    skf = StratifiedKFold(labels, nb_folds)
    avg_precision = np.zeros([
        nb_n_classes,
    ])
    avg_recall = np.zeros([
        nb_n_classes,
    ])
    avg_accuracy = 0
    for train_index, test_index in skf:
        x_train = features[train_index]
        y_train = labels[train_index]
        x_test = features[test_index]
        y_test = labels[test_index]
        clf_DBN = DBN([x_train.shape[1], 300, nb_n_classes],
                      learn_rates=0.3,
                      learn_rate_decays=0.9,
                      epochs=10,
                      verbose=0)
        clf_DBN.fit(x_train, y_train)
        y_test_pred = clf_DBN.predict(x_test)
        y_test_pred = [int(i) for i in y_test_pred]
        precision = precision_score(y_test, y_test_pred, average=None)
        recall = recall_score(y_test, y_test_pred, average=None)
        avg_accuracy += accuracy_score(y_test, y_test_pred)
        avg_precision = np.add(avg_precision, precision)
        avg_recall = np.add(avg_recall, recall)
    avg_precision /= nb_folds
    avg_recall /= nb_folds
    avg_accuracy /= nb_folds
    print "-------------Testing DBN Accuracy------------"
    print "accuracy score", np.around(avg_accuracy, decimals=3)
    print "precision score", np.around(avg_precision, decimals=3)
    print 'recall score', np.around(avg_recall, decimals=3)
예제 #20
0
    def train(self, workDir, classifier, ldaDim):
        fname = "{}labels.csv".format(workDir)  #labels of faces
        logger.info("Loading labels " + fname + " csv size: " +
                    str(os.path.getsize("{}reps.csv".format(workDir))))
        print("Loading labels " + fname + " csv size: " +
              str(os.path.getsize("{}reps.csv".format(workDir))))
        if os.path.getsize(fname) > 0:
            logger.info(fname + " file is not empty")
            print(fname + " file is not empty")
            #labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
            pd_csv = pd.read_csv(fname, header=None).to_numpy()
            print(pd_csv)
            #labels = pd_csv.values[:, 1]
            labels = pd_csv[:, 1]
            logger.info(labels)
        else:
            logger.info(fname + " file is empty")
            print(fname + " file is empty")
            labels = "1:aligned-images/dummy/1.png"  #creating a dummy string to start the process
        #print("1>labels: {}".format(labels))
        logger.debug(map(os.path.dirname, labels))
        logger.debug(map(os.path.split, map(os.path.dirname, labels)))
        logger.debug(
            map(itemgetter(1), map(os.path.split, map(os.path.dirname,
                                                      labels))))

        labels = list(
            map(itemgetter(1), map(os.path.split, map(os.path.dirname,
                                                      labels))))
        #print("2>labels: {}".format(labels))

        fname = "{}reps.csv".format(workDir)  # Representations of faces
        fnametest = format(workDir) + "reps.csv"
        logger.info("Loading embedding " + fname + " csv size: " +
                    str(os.path.getsize(fname)))
        if os.path.getsize(fname) > 0:
            logger.info(fname + " file is not empty")
            embeddings = np.load('{}reps.npy'.format(workDir),
                                 allow_pickle=True)
        else:
            logger.info(fname + " file is empty")
            embeddings = np.zeros(
                (2, 150))  #creating an empty array since csv is empty

        #print("embeddings", embeddings)

        print("labels {}".format(labels))
        # LabelEncoder is a utility class to help normalize labels such that they contain only values between 0 and n_classes-1
        self.le = LabelEncoder().fit(labels)
        # Fits labels to model
        labelsNum = self.le.transform(labels)
        nClasses = len(self.le.classes_)
        logger.info("Training for {} classes.".format(nClasses))

        if classifier == 'LinearSvm':
            self.clf = SVC(C=1, kernel='linear', probability=True)
        elif classifier == 'GridSearchSvm':
            print("""
            Warning: In our experiences, using a grid search over SVM hyper-parameters only
            gives marginally better performance than a linear SVM with C=1 and
            is not worth the extra computations of performing a grid search.
            """)
            param_grid = [{
                'C': [1, 10, 100, 1000],
                'kernel': ['linear']
            }, {
                'C': [1, 10, 100, 1000],
                'gamma': [0.001, 0.0001],
                'kernel': ['rbf']
            }]
            self.clf = GridSearchCV(SVC(C=1, probability=True),
                                    param_grid,
                                    cv=5)
        elif classifier == 'GMM':
            self.clf = GMM(n_components=nClasses)
        elif classifier == 'RadialSvm':  # Radial Basis Function kernel
            # works better with C = 1 and gamma = 2
            self.clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif classifier == 'DecisionTree':  # Doesn't work best
            self.clf = DecisionTreeClassifier(max_depth=20)
        elif classifier == 'GaussianNB':
            self.clf = GaussianNB()
        # ref: https://jessesw.com/Deep-Learning/
        elif classifier == 'DBN':
            from nolearn.dbn import DBN
            self.clf = DBN(
                [embeddings.shape[1], 500, labelsNum[-1:][0] + 1
                 ],  # i/p nodes, hidden nodes, o/p nodes
                learn_rates=0.3,
                # Smaller steps mean a possibly more accurate result, but the
                # training will take longer
                learn_rate_decays=0.9,
                # a factor the initial learning rate will be multiplied by
                # after each iteration of the training
                epochs=300,  # no of iternation
                # dropouts = 0.25, # Express the percentage of nodes that
                # will be randomly dropped as a decimal.
                verbose=1)

        if ldaDim > 0:
            clf_final = self.clf
            self.clf = Pipeline([('lda', LDA(n_components=ldaDim)),
                                 ('clf', clf_final)])

        self.clf.fit(embeddings, labelsNum)  #link embeddings to labels

        fName = "{}/classifier.pkl".format(workDir)
        logger.info("Saving classifier to '{}'".format(fName))
        print("Saving classifier to '{}'".format(fName))
        with open(fName, 'wb') as f:
            pickle.dump(
                (self.le, self.clf), f
            )  # Creates character stream and writes to file to use for recognition
        print("Training finished!")
예제 #21
0
# dataset = sio.loadmat("/home/sujit/scikit_learn_data/mldata/mnist-original.mat")
trainX = dataset['data'].T[0:20000, 0:] / 255.0
trainY = dataset['label'][0][0:20000]

testX = dataset['data'].T[2000:, 0:] / 255.0
testY = dataset['label'][0][20000:]
# train the Deep Belief Network with 784 input units (the flattened,
# 28x28 grayscale image), 300 hidden units, 10 output units (one for
# each possible output classification, which are the digits 1-10)
print trainX.shape, trainY.shape
# trainY = np.array(range(2000))
print type(trainY), trainY

dbn = DBN([trainX.shape[0], 300, 10],
          learn_rates=0.3,
          learn_rate_decays=0.9,
          epochs=10,
          verbose=1)
dbn.fit(trainX, trainY)

# # compute the predictions for the test data and show a classification
# # report
# preds = dbn.predict(testX)
# print classification_report(testY, preds)

# randomly select a few of the test instances
for i in np.random.choice(np.arange(0, len(testY)), size=(10, )):
    # classify the digit
    # pred = dbn.predict(np.atleast_2d(testX[i]))

    # reshape the feature vector to be a 28x28 pixel image, then change
예제 #22
0
    output[tu] = Y[i]

input = np.zeros((100, 1344), dtype=float)
labels = np.zeros((100, 1), dtype=int)
for i in features.keys():
    input[i, :] = features[i]
    labels[i] = output[i]

print "Begin DBN model"
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.1,
                                                    random_state=i)
dbn_model = DBN([X_train.shape[1], 300, 2],
                learn_rates=0.3,
                learn_rate_decays=0.9,
                epochs=100,
                verbose=1)
dbn_model.fit(X_train, Y_train)
y_true, y_pred = Y_test, dbn_model.predict(X_test)  # Get our predictions
print(classification_report(y_true, y_pred))  # Classification on each digit
print 'The accuracy is:', accuracy_score(y_true, y_pred)

print "Begin DBN V2 model"
classifier = SupervisedDBNClassification(hidden_layers_structure=[1000, 200],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='relu',
예제 #23
0
def classification(x,y,features_independent,dependent):
	dt=DBN(verbose=True)
	dt.fit(np.array(x[features_independent]),np.array(x[dependent]))
	predict=dt.predict(np.array(y[features_independent]))
	return predict
예제 #24
0
    elif args.classifier == 'RadialSvm':  # Radial Basis Function kernel
        # works better with C = 1 and gamma = 2
        clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
    elif args.classifier == 'DecisionTree':  # Doesn't work best
>>>>>>> 3dc4546198e3802e6c4463227dbf537308ae890b
        clf = DecisionTreeClassifier(max_depth=20)
    elif args.classifier == 'GaussianNB':
        clf = GaussianNB()

<<<<<<< HEAD
    #ref: https://jessesw.com/Deep-Learning/
    elif args.classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN([embeddings.shape[1], 500, labelsNum[-1:][0] + 1 ], #i/p nodes, hidden nodes, o/p nodes
                  learn_rates = 0.3, #Smaller steps mean a possibly more accurate result, but the training will take longer
                  learn_rate_decays = 0.9, #a factor the initial learning rate will be multiplied by after each iteration of the training
                  epochs = 300, #no of iternation
                  #dropouts = 0.25, # Express the percentage of nodes that will be randomly dropped as a decimal.
                  verbose = 1)
=======
    # ref: https://jessesw.com/Deep-Learning/
    elif args.classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN([embeddings.shape[1], 500, labelsNum[-1:][0] + 1],  # i/p nodes, hidden nodes, o/p nodes
                  learn_rates=0.3,
                  # Smaller steps mean a possibly more accurate result, but the
                  # training will take longer
                  learn_rate_decays=0.9,
                  # a factor the initial learning rate will be multiplied by
                  # after each iteration of the training
                  epochs=300,  # no of iternation
                  # dropouts = 0.25, # Express the percentage of nodes that
예제 #25
0
def train(args):
    print("Loading embeddings.")
    fname = "{}/labels.csv".format(args.workDir)
    labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    labels = map(itemgetter(1), map(os.path.split,
                                    map(os.path.dirname,
                                        labels)))  # Get the directory.
    fname = "{}/reps.csv".format(args.workDir)
    embeddings = pd.read_csv(fname, header=None).as_matrix()
    le = LabelEncoder().fit(labels)

    labelsNum = le.transform(labels)

    nClasses = len(le.classes_)
    print("Training for {} classes.".format(nClasses))

    if args.classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif args.classifier == 'GridSearchSvm':
        print("""
        Warning: In our experiences, using a grid search over SVM hyper-parameters only
        gives marginally better performance than a linear SVM with C=1 and
        is not worth the extra computations of performing a grid search.
        """)
        param_grid = [{
            'C': [1, 10, 100, 1000],
            'kernel': ['linear']
        }, {
            'C': [1, 10, 100, 1000],
            'gamma': [0.001, 0.0001],
            'kernel': ['rbf']
        }]
        clf = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
    elif args.classifier == 'GMM':  # Doesn't work best
        clf = GMM(n_components=nClasses)

    # ref:
    # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
    elif args.classifier == 'RadialSvm':  # Radial Basis Function kernel
        print('RadialSvm Classifier')
        # works better with C = 1 and gamma = 2
        clf = SVC(C=1.5,
                  kernel='rbf',
                  degree=3,
                  probability=True,
                  tol=1e-5,
                  gamma=3,
                  decision_function_shape='ovr',
                  class_weight='balanced')
    elif args.classifier == 'DecisionTree':  # Doesn't work best
        clf = DecisionTreeClassifier(max_depth=20)
    elif args.classifier == 'GaussianNB':
        print('GNB Classifier')
        clf = GaussianNB()

    # ref: https://jessesw.com/Deep-Learning/
    elif args.classifier == 'DBN':
        print('DBN Classifier')
        from nolearn.dbn import DBN

        num_epoch = args.epoch

        # -1, 256, 256, 192, 128, -1
        clf = DBN(
            [-1, 256, 256, 192, 128, -1],  # i/p nodes, hidden nodes, o/p nodes
            learn_rates=0.05,
            learn_rates_pretrain=0.005,
            # Smaller steps mean a possibly more accurate result, but the
            # training will take longer
            learn_rate_decays=0.9,
            # a factor the initial learning rate will be multiplied by
            # after each iteration of the training
            use_re_lu=True,
            minibatch_size=32,
            epochs=num_epoch,  # no of iteration
            dropouts=0.3,  # Express the percentage of nodes that
            # will be randomly dropped as a decimal.
            loss_funct=dbn_loss_func,
            verbose=1)

    if args.classifier == 'DBN':
        clf_final = clf
        clf = Pipeline([('lda', LDA(n_components=nClasses - 1)),
                        ('clf', clf_final)])

    clf.fit(embeddings, labelsNum)

    fName = "{}/classifier.pkl".format(args.workDir)
    print("Saving classifier to '{}'".format(fName))
    with open(fName, 'w') as f:
        pickle.dump((le, clf), f)
예제 #26
0
"""
Train the network with 3200 inputs (64x50 values in file)
6 output units (for the different defects)
Lets start with 2 hidden units
"""

# Numbers from example used here
(trainX, testX, trainY, testY) = train_test_split(
    dataset / 255.0, dataset.target.astype("int0"), test_size=0.33)

dbn = DBN(
    # [[numNodes input layer], numNodes hidden layer, numNodes output layer ]
    [trainX.shape[1], 2, 6],
    # Learning rate of algorithm
    learn_rates=0.3,
    # Decay of learn rate
    learn_rate_decays=0.9,
    # Iterations of training data (epochs)
    epochs=10,
    # Verbosity level
    verbose=1)
dbn.fit(trainX,trainY) 

print "trained yo!"

# Evaluate network
#-----------------
preds = dbn.predict(testX)
print classification_report(testY, preds) # Table of accuracies 

예제 #27
0
파일: do.py 프로젝트: Deep27/kaggle
from nolearn.dbn import DBN
import csv
import numpy as np

net = DBN([784, 300, 10],
          learn_rates=0.3,
          learn_rate_decays=0.9,
          epochs=10,
          verbose=1)

with open('./data/train.csv', 'rb') as f:
    data = list(csv.reader(f))

train_data = np.array(data[1:])
labels = train_data[:, 0].astype('float')
train_data = train_data[:, 1:].astype('float') / 255.0

net.fit(train_data, labels)

with open('./data/test.csv', 'rb') as f:
    data = list(csv.reader(f))

test_data = np.array(data[1:]).astype('float') / 255.0
preds = net.predict(test_data)

with open('./data/submission.csv', 'wb') as f:
    writer = csv.DictWriter(f, fieldnames=['ImageId', 'Label'])
    writer.writeheader()
    i = 1
    for e in preds:
        writer.writerow({'ImageId': i, 'Label': e})
예제 #28
0
                                 :class:`~.gdbn.activationFunctions.Linear`,
                                 :class:`~.gdbn.activationFunctions.Softmax`
                                 from the
                                 :mod:`gdbn.activationFunctions`
                                 module.  Defaults to
                                 :class:`~.gdbn.activationFunctions.Softmax`.
"""

dbn = DBN(
    layer_sizes=[trainX.shape[1], 800,
                 10],  # trainX.shape[1] is the input layer, 10 is output layer
    # 300 is the hidden layer
    # use -1 as last layer if one does not know how many labels
    # are there
    output_act_funct="Softmax",
    dropouts=0.5,
    use_re_lu=True,
    l2_costs=0.0001,
    learn_rates=0.3,
    learn_rate_decays=0.9,
    epochs=10,
    loss_funct=
    None,  # if not specified, default is the count of percentage or wrong labels, built in function
    verbose=1)

##### Below is the trick for changing score function to evaluate the accuracy. The original program
# does not have other options except for pure compare % of accurate outputs,
# here one may create a function of his/her own.
import new


def _score(self, X, y):
예제 #29
0
def train():
    d = getData()
    if d is None:
        return None

    (X, y) = d
    #        numIdentities = len(set(y + [-1]))
    #        if numIdentities <= 1:
    #            return

    if classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif classifier == 'GridSearchSvm':
        '''
Warning: In our experiences, using a grid search over SVM hyper-parameters only
gives marginally better performance than a linear SVM with C=1 and
is not worth the extra computations of performing a grid search.
        '''
        param_grid = [{
            'C': [1, 10, 100, 1000],
            'kernel': ['linear']
        }, {
            'C': [1, 10, 100, 1000],
            'gamma': [0.001, 0.0001],
            'kernel': ['rbf']
        }]
        clf = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
    elif classifier == 'GMM':  # Doesn't work best
        clf = GMM(n_components=nClasses)

    # ref:
    # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
    elif classifier == 'RadialSvm':  # Radial Basis Function kernel
        # works better with C = 1 and gamma = 2
        clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
    elif classifier == 'DecisionTree':  # Doesn't work best
        clf = DecisionTreeClassifier(max_depth=20)
    elif classifier == 'GaussianNB':
        clf = GaussianNB()

    # ref: https://jessesw.com/Deep-Learning/
    elif classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN(
            [embeddings.shape[1], 500, labelsNum[-1:][0] + 1
             ],  # i/p nodes, hidden nodes, o/p nodes
            learn_rates=0.3,
            # Smaller steps mean a possibly more accurate result, but the
            # training will take longer
            learn_rate_decays=0.9,
            # a factor the initial learning rate will be multiplied by
            # after each iteration of the training
            epochs=300,  # no of iternation
            # dropouts = 0.25, # Express the percentage of nodes that
            # will be randomly dropped as a decimal.
            verbose=1)

        if ldaDim > 0:
            clf_final = clf
            clf = Pipeline([('lda', LDA(n_components=ldaDim)),
                            ('clf', clf_final)])

    return clf.fit(X, y)
예제 #30
0
def classifier(args, args_mode, dataset, sess):
    # Check that there are at least one training image per class
    for cls in dataset:
        # print(cls.name,'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
        if (len(cls.image_paths) < 1):
            print(cls.image_paths, "@@@@@@@@@@@@@@@@@@@@@@@")
        assert (
            len(cls.image_paths) > 0,
            'There must be at least one image for each class in the dataset')

    paths, labels, class_labels = get_image_paths_and_labels(dataset)

    print('Number of classes: %d' % len(dataset))
    print('Number of images: %d' % len(paths))

    # Get input and output tensors
    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(
        "phase_train:0")
    embedding_size = embeddings.get_shape()[1]

    # Run forward pass to calculate embeddings
    print('Calculating features for images')
    nrof_images = len(paths)
    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images /
                                           args.batch_size))
    emb_array = np.zeros((nrof_images, embedding_size))
    for i in range(nrof_batches_per_epoch):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = paths[start_index:end_index]
        images = facenet.load_data(paths_batch, False, False, args.image_size)
        feed_dict = {
            images_placeholder: images,
            phase_train_placeholder: False
        }
        emb_array[start_index:end_index, :] = sess.run(embeddings,
                                                       feed_dict=feed_dict)

    classifier_filename_exp = os.path.expanduser(args.classifier_filename)

    if (args_mode == 'TRAIN'):
        # Train classifier
        print('Training classifier+++++++++++++++++++++++++', args.classifier)
        if args.classifier == 'LinearSvm':
            # clf = SVC(C=1, kernel='linear', probability=True)
            model = SVC(kernel='linear', probability=True)
        elif args.classifier == 'GridSearchSvm':
            print("""
                            Warning: In our experiences, using a grid search over SVM hyper-parameters only
                            gives marginally better performance than a linear SVM with C=1 and
                            is not worth the extra computations of performing a grid search.
                            """)
            param_grid = [{
                'C': [1, 10, 100, 1000],
                'kernel': ['linear']
            }, {
                'C': [1, 10, 100, 1000],
                'gamma': [0.001, 0.0001],
                'kernel': ['rbf']
            }]
            model = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
        elif args.classifier == 'GMM':  # Doesn't work best
            model = GMM(n_components=nClasses)

        # ref:
        # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
        elif args.classifier == 'RadialSvm':  # Radial Basis Function kernel
            # works better with C = 1 and gamma = 2
            model = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif args.classifier == 'DecisionTree':  # Doesn't work best
            model = DecisionTreeClassifier(max_depth=20)
        elif args.classifier == 'GaussianNB':
            model = GaussianNB()

        # ref: https://jessesw.com/Deep-Learning/
        elif args.classifier == 'DBN':
            from nolearn.dbn import DBN
            model = DBN(
                [embeddings.shape[1], 500, labelsNum[-1:][0] + 1
                 ],  # i/p nodes, hidden nodes, o/p nodes
                learn_rates=0.3,
                # Smaller steps mean a possibly more accurate result, but the
                # training will take longer
                learn_rate_decays=0.9,
                # a factor the initial learning rate will be multiplied by
                # after each iteration of the training
                epochs=300,  # no of iternation
                # dropouts = 0.25, # Express the percentage of nodes that
                # will be randomly dropped as a decimal.
                verbose=1)
        elif args.classifier == 'KNN':
            model = KNeighborsClassifier(algorithm='auto',
                                         leaf_size=30,
                                         metric='minkowski',
                                         metric_params=None,
                                         n_jobs=1,
                                         n_neighbors=5,
                                         p=2,
                                         weights='uniform')

        model.fit(emb_array, labels)

        # Create a list of class names
        class_names = [cls.name.replace('_', ' ') for cls in dataset]

        # Saving classifier model
        with open(classifier_filename_exp, 'wb') as outfile:
            pickle.dump((model, class_names), outfile)
        print('Saved classifier model to file "%s"' % classifier_filename_exp)

    elif (args_mode == 'CLASSIFY'):
        # Classify images
        print('Testing classifier~~~~~~~~~~~~~~~~~~~~~~~~')
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)
        predictions = np.zeros((nrof_images, len(class_names)))
        print('Loaded classifier model from file "%s"' %
              classifier_filename_exp)
        correctPrediction = 0
        inCorrectPrediction = 0
        sumConfidence = 0.0
        correctConfidence = 0.0
        inCorrectConfidence = 0.0
        '''
         batch_size =args.batch_size
        #batch_size = 1
        for i in range(nrof_batches_per_epoch):
            start_index = i * batch_size
            end_index = min((i + 1) * batch_size, nrof_images)
            starttime = time.time()
            mini_emb_array = emb_array[start_index:end_index, :]
            predictions[start_index:end_index, :] = model.predict_proba(mini_emb_array)
            print("start_index:{} end_index:{} time:{}".format(start_index, end_index, time.time() - starttime))

        '''
        predictions = model.predict_proba(emb_array)
        best_class_indices = np.argmax(predictions, axis=1)
        best_class_probabilities = predictions[
            np.arange(len(best_class_indices)), best_class_indices]
        results = {'name': [], 'bestname': [], 'probabilities': []}
        for i in range(len(best_class_indices)):
            # print(len(class_names))
            # print(i,len(labels),labels[i])
            # print(i,len(best_class_indices),best_class_indices[i])
            print('%4d  %s:%s: %.3f' %
                  (i, class_labels[i], class_names[best_class_indices[i]],
                   best_class_probabilities[i]))
            results['name'].append(class_labels[i])
            results['bestname'].append(class_names[best_class_indices[i]])
            results['probabilities'].append(best_class_probabilities[i])
            sumConfidence += best_class_probabilities[i]
            if (class_labels[i] == class_names[best_class_indices[i]]):
                correctPrediction += 1
                correctConfidence += best_class_probabilities[i]
            else:
                inCorrectPrediction += 1
                inCorrectConfidence += best_class_probabilities[i]

        # accuracy = np.mean(np.equal(best_class_indices, labels))
        accuracy = float(correctPrediction) / (correctPrediction +
                                               inCorrectPrediction)
        Avg_Confidence = float(sumConfidence) / (correctPrediction +
                                                 inCorrectPrediction)
        Avg_correctConfidence = float(correctConfidence / correctPrediction)
        Avg_inCorrectConfidence = float(inCorrectConfidence /
                                        inCorrectPrediction)
        results['name'].append('Accuracy:')
        results['bestname'].append('Accuracy:')
        results['probabilities'].append(accuracy)
        dataname = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
        data_frame = pd.DataFrame(
            data={
                'name': results['name'],
                'bestname': results['bestname'],
                'probabilities': results['probabilities']
            })
        data_frame.to_csv(args.data_dir + '/results_' + dataname + '.csv')

        print("Correct Prediction :" + str(correctPrediction))
        print("In-correct Prediction: " + str(inCorrectPrediction))
        print('Accuracy: %.3f' % accuracy)
        print("Avg Confidence: " + str(Avg_Confidence))
        print("Avg CorrectConfidence: " + str(Avg_correctConfidence))
        print("Avg inCorrectConfidence: " + str(Avg_inCorrectConfidence))