def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    unl = load_unlabeled_training(flatten=True)
    unl = standardize(unl)
    test = load_public_test(flatten=True)
    test = standardize(test)
    shuffle_in_unison(images, labels)
    #d = DictionaryLearning().fit(images)
    d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images)
    s = SparseCoder(d.components_)
    proj_test = s.transform(images)
    pt = s.transform(test)
    #kpca = KernelPCA(kernel="rbf")
    #kpca.fit(unl)
    #test_proj = kpca.transform(images)
    #pt = kpca.transform(test)
    #spca = SparsePCA().fit(unl)
    #test_proj = spca.transform(images)
    #pt = spca.transform(test)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    print scores
    print np.mean(scores)
    print np.var(scores)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    # images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    # ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05)
    # trainer = RPropMinusTrainer(net, dataset=train)
    # cv = validation.CrossValidator(trainer, ds)
    # print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train["class"])
    # trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train), train["class"])
        testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
def main():
    pk=80
    for pk in [80, 200, 500, 1024]:
        proj_test, labels = load_pca_proj(K=pk)
        shuffle_in_unison(proj_test, labels)
        for k in [1,2,3,4,5,6,7,8,9,15,20,25,30]:
            knn = KNeighborsClassifier(n_neighbors=k)
            scores = cross_validation.cross_val_score(knn, proj_test, labels, cv=10)
            print "K: " + str(k)
            print "PK: " + str(pk)
            print scores
            print np.mean(scores)
            print np.var(scores)
def main():
    N_TREE = 1001
    k = 200
    rfc = RandomForestClassifier(n_estimators=N_TREE, criterion="entropy", max_features="auto")
    RandomForestClassifier
    proj_test, labels = load_pca_proj(K=k)
    shuffle_in_unison(proj_test, labels)
    scores = cross_validation.cross_val_score(rfc, proj_test, labels, cv=10)
    pt = load_pca_test(K=k)
    rfc.fit(proj_test, labels)
    pred = rfc.predict(pt)
    write_results(pred, "../rfc_res.csv")
    print scores
    print np.mean(scores)
    print np.var(scores)
Exemple #5
0
def main():
    N_TREE = 1001
    k = 200 
    rfc = RandomForestClassifier(n_estimators=N_TREE, criterion='entropy', max_features="auto")
    RandomForestClassifier
    proj_test, labels = load_pca_proj(K=k)
    shuffle_in_unison(proj_test, labels)
    scores = cross_validation.cross_val_score(rfc, proj_test, labels, cv=10)
    pt = load_pca_test(K=k)
    rfc.fit(proj_test, labels)
    pred = rfc.predict(pt)
    write_results(pred, '../rfc_res.csv')
    print scores
    print np.mean(scores)
    print np.var(scores)
Exemple #6
0
def main():
    pk = 80
    for pk in [80, 200, 500, 1024]:
        proj_test, labels = load_pca_proj(K=pk)
        shuffle_in_unison(proj_test, labels)
        for k in [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30]:
            knn = KNeighborsClassifier(n_neighbors=k)
            scores = cross_validation.cross_val_score(knn,
                                                      proj_test,
                                                      labels,
                                                      cv=10)
            print "K: " + str(k)
            print "PK: " + str(pk)
            print scores
            print np.mean(scores)
            print np.var(scores)
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    #images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    #ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim,
                                 1000,
                                 train.outdim,
                                 outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net,
                              dataset=train,
                              momentum=0.1,
                              learningrate=0.01,
                              weightdecay=0.05)
    #trainer = RPropMinusTrainer(net, dataset=train)
    #cv = validation.CrossValidator(trainer, ds)
    #print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train['class'])
    #trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test),
                           test['class'])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train),
                              train['class'])
        testres = percentError(trainer.testOnClassData(dataset=test),
                               test['class'])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (
            trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
def main():
    k=500
      #images, labels = load_labeled_training(flatten=True)
   #images = standardize(images)
   #shuffle_in_unison(images, labels)
   #for k in [100,500,1024]:
      #proj_test, labels = load_pca_proj(K=k)
      #shuffle_in_unison(proj_test, labels)
      #for ker in ['linear', 'sigmoid', 'rbf']:
           #svc = SVC(kernel=ker)
           #scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
           #print "Kernel: " + ker
           #print "K: " + str(k)
           #print scores
           #print np.mean(scores)
           #print np.var(scores)
    proj_test, labels = load_pca_proj(K=k)
    shuffle_in_unison(proj_test, labels)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    pt = load_pca_hidden(K=k)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
def main():
    k = 500
    #images, labels = load_labeled_training(flatten=True)
    #images = standardize(images)
    #shuffle_in_unison(images, labels)
    #for k in [100,500,1024]:
    #proj_test, labels = load_pca_proj(K=k)
    #shuffle_in_unison(proj_test, labels)
    #for ker in ['linear', 'sigmoid', 'rbf']:
    #svc = SVC(kernel=ker)
    #scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    #print "Kernel: " + ker
    #print "K: " + str(k)
    #print scores
    #print np.mean(scores)
    #print np.var(scores)
    proj_test, labels = load_pca_proj(K=k)
    shuffle_in_unison(proj_test, labels)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    pt = load_pca_hidden(K=k)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
Exemple #10
0
def train(episode_padded, network_type):
    """
    pre_train setting: model is defined newly, trained and saved (overwrites possible older models)
    self_play setting: model is loaded from self_play and if not available there from pre_train.


    Loads an existing model or existing weights if available
    and then trains the model with the data available in the directory based on the train mode and the network type.
    Saves the best model and weights during training based on the validation loss

    :param episode_padded:  0 (pre_train), 1..n (self_play)
    :param network_type:    cards or score
    """

    # set random seeds for reproducible experiments
    seed(1)  # numpy
    set_random_seed(2)  # tensorflow backend

    episode_number = int(episode_padded)
    if episode_number < 0:
        print("\nPlease enter an episode number >= 0!")
        return
    elif episode_number == 0:  # pre_train setting: overwrite possible existing models
        saved_model_path = model_path(episode_padded, network_type)
        if os.path.exists(saved_model_path):
            print("\nNo need to train. Saved model found in " +
                  saved_model_path)
            return
        else:
            model = define_separate_model(network_type)
            optimizer = SGD(
                lr=1e-2, momentum=0.9, decay=1e-6, nesterov=True
            )  # use sgd for cards estimation (tried also RAdam, Adam)
            optimizer = Adam()  # use adam for score estimation
            # Reason for mse: big errors should be punished!
            loss = 'mse'  # Tried also: 'mae', 'mape', 'kullback_leibler_divergence', 'categorical_crossentropy', 'acc', 'hinge', 'logcosh'
            model.compile(loss=loss,
                          optimizer=optimizer,
                          metrics=['acc', 'mae'])
            print("\nCompiled new model")
    else:  # self_play setting: loading existing model of the previous episode and saving to current episode
        saved_model_path = model_path(zero_pad(episode_number - 1),
                                      network_type)
        if not os.path.exists(saved_model_path):
            print("\nNo saved model found in " + saved_model_path)
            return
        else:
            model = load_model(saved_model_path)
            print("\nLoaded existing model from " + saved_model_path)

        # if os.path.exists(weights_path(episode_number, network_type)):
        #    model = model.load_weights(weights_path(episode_number, network_type))
        #    print("\nLoaded existing weights from " + weights_path(episode_number, network_type))

    print("Loading data...")

    # TODO evtl mit mehr daten trainieren (mit generator laden)

    x_train = load_dataset(episode_number, network_type, "train/",
                           features_path)
    y_train = load_dataset(episode_number, network_type, "train/",
                           targets_path)
    shuffle_in_unison(x_train, y_train)

    x_test = load_dataset(episode_number, network_type, "test/", features_path)
    y_test = load_dataset(episode_number, network_type, "test/", targets_path)
    shuffle_in_unison(x_test, y_test)

    x_val = load_dataset(episode_number, network_type, "val/", features_path)
    y_val = load_dataset(episode_number, network_type, "val/", targets_path)
    shuffle_in_unison(x_val, y_val)

    print("Training...")

    h = History()
    tb = TensorBoard(log_dir='./Graph', write_images=True)
    es = EarlyStopping(monitor='val_loss',
                       min_delta=0.0001,
                       patience=5,
                       restore_best_weights=True)
    mc = ModelCheckpoint(model_path(episode_padded, network_type),
                         save_best_only=True,
                         save_weights_only=False,
                         verbose=1)
    wc = ModelCheckpoint(weights_path(episode_padded, network_type),
                         save_best_only=True,
                         save_weights_only=True,
                         verbose=1)
    emc = ExportModelCheckpoint(export_path(episode_padded, network_type),
                                save_best_only=True,
                                verbose=1)

    history = model.fit(x_train,
                        y_train,
                        epochs=999,
                        batch_size=32,
                        validation_data=(x_val, y_val),
                        callbacks=[h, tb, es, mc, wc, emc])

    min_val_loss = min(history.history['val_loss'])
    print(min_val_loss,
          file=open(base_path() + "min_val_loss.txt",
                    "w"))  # This file is then read in the Java code

    print("Performance on test set")
    test_loss = model.evaluate(x_test, y_test)
    print(model.metrics_names)
    print(test_loss)
    print(test_loss[0],
          file=open(base_path() + "test_loss.txt",
                    "w"))  # This file is then read in the Java code

    numpy.set_printoptions(
        precision=2,
        threshold=sys.maxsize)  # so that the print output is not truncated
    prediction = model.predict(numpy.expand_dims(x_test[0], axis=0))
    print(numpy.hstack((prediction[0], y_test[0])))
Exemple #11
0
    print(
        f"Initially {len(cap_X)} cap tweets and {len(nocap_X)} no-cap tweets.")

    minl = min(len(cap_X), len(nocap_X))
    np.random.shuffle(cap_X)
    np.random.shuffle(nocap_X)
    cap_X = cap_X[:minl]
    nocap_X = nocap_X[:minl]

    X = cap_X + nocap_X
    Y = [0] * len(cap_X) + [1] * len(nocap_X)
    del cap_X
    del nocap_X
    Y = np.array(Y)

    util.shuffle_in_unison(X, Y)

    tokenizer = tf.keras.preprocessing.text.Tokenizer(
        vocab_size,
        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
        lower=True,
        split=" ",
        char_level=False,
        oov_token="<unk>",
    )
    tokenizer.fit_on_texts(X)
    thresh, count = 10, 0
    for _, c in tokenizer.word_counts.items():
        count += 1 if c > thresh else 0
    print(f"{count} words used > {thresh} times")