def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) unl = load_unlabeled_training(flatten=True) unl = standardize(unl) test = load_public_test(flatten=True) test = standardize(test) shuffle_in_unison(images, labels) #d = DictionaryLearning().fit(images) d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images) s = SparseCoder(d.components_) proj_test = s.transform(images) pt = s.transform(test) #kpca = KernelPCA(kernel="rbf") #kpca.fit(unl) #test_proj = kpca.transform(images) #pt = kpca.transform(test) #spca = SparsePCA().fit(unl) #test_proj = spca.transform(images) #pt = spca.transform(test) svc = SVC() scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) print scores print np.mean(scores) print np.var(scores) svc.fit(proj_test, labels) pred = svc.predict(pt) write_results(pred, '../svm_res.csv')
def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) # images, labels = load_pca_proj(K=100) shuffle_in_unison(images, labels) ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7) for i, l in zip(images, labels): ds.addSample(i, [l - 1]) # ds._convertToOneOfMany() test, train = ds.splitWithProportion(0.2) test._convertToOneOfMany() train._convertToOneOfMany() net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05) # trainer = RPropMinusTrainer(net, dataset=train) # cv = validation.CrossValidator(trainer, ds) # print cv.validate() net.randomize() tr_labels_2 = net.activateOnDataset(train).argmax(axis=1) trnres = percentError(tr_labels_2, train["class"]) # trnres = percentError(trainer.testOnClassData(dataset=train), train['class']) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) print "Training error: %.10f, Test error: %.10f" % (trnres, testres) print "Iters: %d" % trainer.totalepochs for i in range(100): trainer.trainEpochs(10) trnres = percentError(trainer.testOnClassData(dataset=train), train["class"]) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) trnmse = trainer.testOnData(dataset=train) testmse = trainer.testOnData(dataset=test) print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres) print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
def main(): pk=80 for pk in [80, 200, 500, 1024]: proj_test, labels = load_pca_proj(K=pk) shuffle_in_unison(proj_test, labels) for k in [1,2,3,4,5,6,7,8,9,15,20,25,30]: knn = KNeighborsClassifier(n_neighbors=k) scores = cross_validation.cross_val_score(knn, proj_test, labels, cv=10) print "K: " + str(k) print "PK: " + str(pk) print scores print np.mean(scores) print np.var(scores)
def main(): N_TREE = 1001 k = 200 rfc = RandomForestClassifier(n_estimators=N_TREE, criterion="entropy", max_features="auto") RandomForestClassifier proj_test, labels = load_pca_proj(K=k) shuffle_in_unison(proj_test, labels) scores = cross_validation.cross_val_score(rfc, proj_test, labels, cv=10) pt = load_pca_test(K=k) rfc.fit(proj_test, labels) pred = rfc.predict(pt) write_results(pred, "../rfc_res.csv") print scores print np.mean(scores) print np.var(scores)
def main(): N_TREE = 1001 k = 200 rfc = RandomForestClassifier(n_estimators=N_TREE, criterion='entropy', max_features="auto") RandomForestClassifier proj_test, labels = load_pca_proj(K=k) shuffle_in_unison(proj_test, labels) scores = cross_validation.cross_val_score(rfc, proj_test, labels, cv=10) pt = load_pca_test(K=k) rfc.fit(proj_test, labels) pred = rfc.predict(pt) write_results(pred, '../rfc_res.csv') print scores print np.mean(scores) print np.var(scores)
def main(): pk = 80 for pk in [80, 200, 500, 1024]: proj_test, labels = load_pca_proj(K=pk) shuffle_in_unison(proj_test, labels) for k in [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30]: knn = KNeighborsClassifier(n_neighbors=k) scores = cross_validation.cross_val_score(knn, proj_test, labels, cv=10) print "K: " + str(k) print "PK: " + str(pk) print scores print np.mean(scores) print np.var(scores)
def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) #images, labels = load_pca_proj(K=100) shuffle_in_unison(images, labels) ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7) for i, l in zip(images, labels): ds.addSample(i, [l - 1]) #ds._convertToOneOfMany() test, train = ds.splitWithProportion(0.2) test._convertToOneOfMany() train._convertToOneOfMany() net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05) #trainer = RPropMinusTrainer(net, dataset=train) #cv = validation.CrossValidator(trainer, ds) #print cv.validate() net.randomize() tr_labels_2 = net.activateOnDataset(train).argmax(axis=1) trnres = percentError(tr_labels_2, train['class']) #trnres = percentError(trainer.testOnClassData(dataset=train), train['class']) testres = percentError(trainer.testOnClassData(dataset=test), test['class']) print "Training error: %.10f, Test error: %.10f" % (trnres, testres) print "Iters: %d" % trainer.totalepochs for i in range(100): trainer.trainEpochs(10) trnres = percentError(trainer.testOnClassData(dataset=train), train['class']) testres = percentError(trainer.testOnClassData(dataset=test), test['class']) trnmse = trainer.testOnData(dataset=train) testmse = trainer.testOnData(dataset=test) print "Iteration: %d, Training error: %.5f, Test error: %.5f" % ( trainer.totalepochs, trnres, testres) print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
def main(): k=500 #images, labels = load_labeled_training(flatten=True) #images = standardize(images) #shuffle_in_unison(images, labels) #for k in [100,500,1024]: #proj_test, labels = load_pca_proj(K=k) #shuffle_in_unison(proj_test, labels) #for ker in ['linear', 'sigmoid', 'rbf']: #svc = SVC(kernel=ker) #scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) #print "Kernel: " + ker #print "K: " + str(k) #print scores #print np.mean(scores) #print np.var(scores) proj_test, labels = load_pca_proj(K=k) shuffle_in_unison(proj_test, labels) svc = SVC() scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) pt = load_pca_hidden(K=k) svc.fit(proj_test, labels) pred = svc.predict(pt) write_results(pred, '../svm_res.csv')
def main(): k = 500 #images, labels = load_labeled_training(flatten=True) #images = standardize(images) #shuffle_in_unison(images, labels) #for k in [100,500,1024]: #proj_test, labels = load_pca_proj(K=k) #shuffle_in_unison(proj_test, labels) #for ker in ['linear', 'sigmoid', 'rbf']: #svc = SVC(kernel=ker) #scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) #print "Kernel: " + ker #print "K: " + str(k) #print scores #print np.mean(scores) #print np.var(scores) proj_test, labels = load_pca_proj(K=k) shuffle_in_unison(proj_test, labels) svc = SVC() scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) pt = load_pca_hidden(K=k) svc.fit(proj_test, labels) pred = svc.predict(pt) write_results(pred, '../svm_res.csv')
def train(episode_padded, network_type): """ pre_train setting: model is defined newly, trained and saved (overwrites possible older models) self_play setting: model is loaded from self_play and if not available there from pre_train. Loads an existing model or existing weights if available and then trains the model with the data available in the directory based on the train mode and the network type. Saves the best model and weights during training based on the validation loss :param episode_padded: 0 (pre_train), 1..n (self_play) :param network_type: cards or score """ # set random seeds for reproducible experiments seed(1) # numpy set_random_seed(2) # tensorflow backend episode_number = int(episode_padded) if episode_number < 0: print("\nPlease enter an episode number >= 0!") return elif episode_number == 0: # pre_train setting: overwrite possible existing models saved_model_path = model_path(episode_padded, network_type) if os.path.exists(saved_model_path): print("\nNo need to train. Saved model found in " + saved_model_path) return else: model = define_separate_model(network_type) optimizer = SGD( lr=1e-2, momentum=0.9, decay=1e-6, nesterov=True ) # use sgd for cards estimation (tried also RAdam, Adam) optimizer = Adam() # use adam for score estimation # Reason for mse: big errors should be punished! loss = 'mse' # Tried also: 'mae', 'mape', 'kullback_leibler_divergence', 'categorical_crossentropy', 'acc', 'hinge', 'logcosh' model.compile(loss=loss, optimizer=optimizer, metrics=['acc', 'mae']) print("\nCompiled new model") else: # self_play setting: loading existing model of the previous episode and saving to current episode saved_model_path = model_path(zero_pad(episode_number - 1), network_type) if not os.path.exists(saved_model_path): print("\nNo saved model found in " + saved_model_path) return else: model = load_model(saved_model_path) print("\nLoaded existing model from " + saved_model_path) # if os.path.exists(weights_path(episode_number, network_type)): # model = model.load_weights(weights_path(episode_number, network_type)) # print("\nLoaded existing weights from " + weights_path(episode_number, network_type)) print("Loading data...") # TODO evtl mit mehr daten trainieren (mit generator laden) x_train = load_dataset(episode_number, network_type, "train/", features_path) y_train = load_dataset(episode_number, network_type, "train/", targets_path) shuffle_in_unison(x_train, y_train) x_test = load_dataset(episode_number, network_type, "test/", features_path) y_test = load_dataset(episode_number, network_type, "test/", targets_path) shuffle_in_unison(x_test, y_test) x_val = load_dataset(episode_number, network_type, "val/", features_path) y_val = load_dataset(episode_number, network_type, "val/", targets_path) shuffle_in_unison(x_val, y_val) print("Training...") h = History() tb = TensorBoard(log_dir='./Graph', write_images=True) es = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, restore_best_weights=True) mc = ModelCheckpoint(model_path(episode_padded, network_type), save_best_only=True, save_weights_only=False, verbose=1) wc = ModelCheckpoint(weights_path(episode_padded, network_type), save_best_only=True, save_weights_only=True, verbose=1) emc = ExportModelCheckpoint(export_path(episode_padded, network_type), save_best_only=True, verbose=1) history = model.fit(x_train, y_train, epochs=999, batch_size=32, validation_data=(x_val, y_val), callbacks=[h, tb, es, mc, wc, emc]) min_val_loss = min(history.history['val_loss']) print(min_val_loss, file=open(base_path() + "min_val_loss.txt", "w")) # This file is then read in the Java code print("Performance on test set") test_loss = model.evaluate(x_test, y_test) print(model.metrics_names) print(test_loss) print(test_loss[0], file=open(base_path() + "test_loss.txt", "w")) # This file is then read in the Java code numpy.set_printoptions( precision=2, threshold=sys.maxsize) # so that the print output is not truncated prediction = model.predict(numpy.expand_dims(x_test[0], axis=0)) print(numpy.hstack((prediction[0], y_test[0])))
print( f"Initially {len(cap_X)} cap tweets and {len(nocap_X)} no-cap tweets.") minl = min(len(cap_X), len(nocap_X)) np.random.shuffle(cap_X) np.random.shuffle(nocap_X) cap_X = cap_X[:minl] nocap_X = nocap_X[:minl] X = cap_X + nocap_X Y = [0] * len(cap_X) + [1] * len(nocap_X) del cap_X del nocap_X Y = np.array(Y) util.shuffle_in_unison(X, Y) tokenizer = tf.keras.preprocessing.text.Tokenizer( vocab_size, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=" ", char_level=False, oov_token="<unk>", ) tokenizer.fit_on_texts(X) thresh, count = 10, 0 for _, c in tokenizer.word_counts.items(): count += 1 if c > thresh else 0 print(f"{count} words used > {thresh} times")