def main(): # create data feeder and get features and target dt = DataFeeder() features, target = dt.get_data() # perform PCA with variety of components #features = dt.pca(2) features = dt.pca(10) # get best hyperparameters scorer = make_scorer(f1_score, pos_label=0) params = find_parameters(features, target, scorer=scorer) # run train test split without penalty print('#################################################') print('Train test split without penaty') run_train_test_split(features, target, C=params['C'], penalty='none', solver='saga') # run train test split with L2 penalty print('#################################################') print('Train test split with L2 penaty') run_train_test_split(features, target, C=params['C']) # run cross validation with L2 penalty print('#################################################') print('Cross Validation with L2 penalty') run_cross_validation(features, target, C=params['C'], penalty='none', solver='saga', title='Cross validation with no penalty') # run cross validation without penalty print('#################################################') print('Cross Validation without penalty') run_cross_validation(features, target, C=params['C'], title='Cross validation with l2 penalty') # plot decission boundaries plt.show()
def train_network(model, epochs = 5): """ Main script for training the Behavioral Cloning Network model """ modelname = "model" print(model.summary()) data = DataFeeder() callbacks = [ModelCheckpoint('model{epoch:02d}.h5')] #callbacks.append(EarlyStopping(monitor='val_loss', min_delta=1e-6, verbose=1)) model.compile(optimizer = "adam", loss = "mse") history = model.fit_generator(data.fetch_train(), nb_epoch = epochs, steps_per_epoch = data.steps_per_epoch, \ validation_data = data.fetch_valid(), validation_steps = data.validation_steps, \ verbose = 1, callbacks = callbacks) model.save(modelname + ".h5") print("Model saved to {}.h5".format(modelname)) fig = plt.figure() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['train', 'validation'], loc='upper right') plt.savefig(modelname + '_training_history.png') plt.close(fig) return "Finish"
def run_4d_model(): """ 4D example """ print('\nLinear Discriminant Analysis - 4 dimensions\n') # get features of the data and the target dt = DataFeeder() X, y = dt.get_data() # reduce our features only to 2 dimensions X = run_pca(X, n_components=4, columns=['pc_1', 'pc_2', 'pc_3', 'pc_4']) # split data into 70% training & 30% testing X_train_std, X_test_std, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # create linear dicriminant analysis model model = LinearDiscriminantAnalysis() # train model.fit(X_train_std, y_train) # test y_pred = model.predict(X_test_std) # calculate model accuracy score score = accuracy_score(y_test, y_pred) * 100 print('# Accuracy score: %.2f' % score) calculate_f1_score(y_test, y_pred) # plot confusion matrix plot_confusion_matrix(y_test, y_pred, normalize=True, title='Confusion Matrix') plt.show()
class MyTensorBoard(TensorBoard): def __init__(self, smt, log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False, flags=None): if not flags: raise Exception("flags cannot be None!") super(MyTensorBoard, self).__init__(log_dir, histogram_freq, write_graph, write_images) self.smt = smt self.FLAGS = flags self.test_feeder = DataFeeder(data_dir=self.FLAGS.data_dir, prefix="newstest2013", vocab_size=self.FLAGS.vocab_size) def test(self): saved_weights = "temp.ckpt" self.model.save(saved_weights, overwrite=True) self.smt.load_weights(saved_weights) _ = test_translation(self.smt, self.test_feeder, self.FLAGS, nbatches=2, search_method=1) try: os.remove(saved_weights) except OSError: pass def on_batch_end(self, batch, logs={}): if (batch + 1) % 100 == 0: import tensorflow as tf for name, value in logs.items(): if name in ['batch', 'size']: continue summary = tf.Summary() summary_value = summary.value.add() summary_value.simple_value = logs.get(name).item() summary_value.tag = name self.writer.add_summary(summary, batch) self.writer.flush() if (batch + 1) % self.FLAGS.save_frequency == 0: save_path = "model-%d.ckpt" % (batch + 1) self.model.save(os.path.join(self.log_dir, save_path)) if (batch + 1) % self.FLAGS.validation_frequency == 0: self.test() val_loss = self.model.evaluate_generator(generator=self.test_feeder.produce(self.FLAGS.batch_size), val_samples=self.test_feeder.get_size()) print("||| Validataion Loss: %.3f" % val_loss) print("-------------------------------------------------------") import tensorflow as tf summary = tf.Summary() summary_value = summary.value.add() summary_value.simple_value = val_loss summary_value.tag = "val_loss" self.writer.add_summary(summary, batch) self.writer.flush()
def __init__(self, smt, log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False, flags=None): if not flags: raise Exception("flags cannot be None!") super(MyTensorBoard, self).__init__(log_dir, histogram_freq, write_graph, write_images) self.smt = smt self.FLAGS = flags self.test_feeder = DataFeeder(data_dir=self.FLAGS.data_dir, prefix="newstest2013", vocab_size=self.FLAGS.vocab_size)
def test(self, reader, feeding=None): """ Testing method. Will test input data. :param reader: A batch reader that reads and yeilds data items, it should be a paddle.v2.batch. :type reader: collections.Iterable :param feeding: Feeding is a map of neural network input name and array index that reader returns. :type feeding: dict :return: """ import py_paddle.swig_paddle as api from data_feeder import DataFeeder feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) evaluator.start() total_cost = 0 num_samples = 0.0 for data_batch in reader(): num_samples += len(data_batch) in_args = feeder(data_batch) self.__prepare_parameter__(in_args) self.__gradient_machine__.forward(in_args, out_args, api.PASS_TEST) total_cost += out_args.sum() self.__gradient_machine__.eval(evaluator) evaluator.finish() return v2_event.TestResult(evaluator=evaluator, cost=total_cost / num_samples)
def train(self, reader, num_passes=1, event_handler=None, feeding=None): """ Training method. Will train num_passes of input data. :param reader: :param num_passes: The total train passes. :param event_handler: Event handler. A method will be invoked when event occurred. :type event_handler: (BaseEvent) => None :param feeding: Feeding is a map of neural network input name and array index that reader returns. :type feeding: dict :return: """ if event_handler is None: event_handler = default_event_handler __check_train_args__(**locals()) updater = self.__optimizer__.create_local_updater() updater.init(self.__gradient_machine__) self.__gradient_machine__.start() batch_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(batch_evaluator, api.Evaluator) pass_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) feeder = DataFeeder(self.__data_types__, feeding) for pass_id in xrange(num_passes): event_handler(v2_event.BeginPass(pass_id)) pass_evaluator.start() updater.startPass() for batch_id, data_batch in enumerate(reader()): batch_evaluator.start() event_handler( v2_event.BeginIteration(pass_id=pass_id, batch_id=batch_id)) pass_type = updater.startBatch(len(data_batch)) self.__gradient_machine__.forwardBackward( feeder(data_batch), out_args, pass_type) self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(batch_evaluator) for each_param in self.__gradient_machine__.getNonStaticParameters( ): updater.update(each_param) cost_sum = out_args.sum() cost = cost_sum / len(data_batch) updater.finishBatch(cost) batch_evaluator.finish() event_handler( v2_event.EndIteration(pass_id=pass_id, batch_id=batch_id, cost=cost, evaluator=batch_evaluator)) updater.finishPass() pass_evaluator.finish() event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) self.__gradient_machine__.finish()
def main(): """ Initialise DataFrame and pull the features and targets """ df = DataFeeder() features, target = df.get_data() """ Use only 1 component """ features = df.pca(n_components=1) """ Split features and target into 70% train and 30% test """ features_train, features_test, target_train, target_test = train_test_split( features, target, test_size=0.3, stratify=target, random_state=100) """ Initialise Gaussian Naive Bayes into variable clf """ clf = GaussianNB() """ Fit the training data into the classifier and predict using test data """ y_pred = clf.fit(features_train, target_train).predict(features_test) """ Calculate and print accuracy score """ acc = accuracy_score(target_test, y_pred) * 100 print("Accuracy Score: %.2f" % acc) print("F1 score: %.2f" % (f1_score(target_test, y_pred) * 100)) print("Recall score: %.2f" % (recall_score(target_test, y_pred) * 100)) print("Precision score: %.2f" % (precision_score(target_test, y_pred) * 100))
def run_2d_model(): """ 2D example """ print( '\nLinear Discriminant Analysis - 2 dimensions with decision regions\n' ) # get features of the data and the target dt = DataFeeder() X, y = dt.get_data() # reduce our features only to 2 dimensions X = run_pca(X) # split data into 70% training & 30% testing X_train_std, X_test_std, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # create linear dicriminant analysis model model = LinearDiscriminantAnalysis() # train model.fit(X_train_std, y_train) # test y_pred = model.predict(X_test_std) # calculate model accuracy score score = accuracy_score(y_test, y_pred) * 100 print('# Accuracy score: %.2f' % score) calculate_f1_score(y_test, y_pred) # prepare data for visualization X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined_std = np.hstack((y_train, y_test)) # plot decision boundaries plt.figure() plot_decision_regions(X_combined_std, y_combined_std, model) # plot confusion matrix plot_confusion_matrix(y_test, y_pred, normalize=True, title='Confusion Matrix') plt.show()
def main(): # init data feeder df = DataFeeder() # get pre-processed features and target features, target = df.get_data() plot_hist(target, xlabel='Diagnosis', ylabel='Patient Records', title='Patient Diagnosis Distribution', xlim=['M', 'B']) # run PCA to reduce data dimensionality # features = df.pca(n_components=2) # features = df.pca(n_components=4) features = df.pca(n_components=10) # find best hyperparameter n_neighbors = find_best_params(features, target)['n_neighbors'] print("Best number of neighbors: %d" % n_neighbors) # run train_test_split std_test_train_split(features, target, n_neighbors=n_neighbors) # run cross validation cross_validation(features, target, n_neighbors=n_neighbors) # show all graphs plt.show()
def iter_infer(self, input, feeding=None): feeder = DataFeeder(self.__data_types__, feeding) batch_size = len(input) def __reader_impl__(): for each_sample in input: yield each_sample reader = minibatch.batch(__reader_impl__, batch_size=batch_size) self.__gradient_machine__.start() for data_batch in reader(): yield self.__gradient_machine__.forwardTest(feeder(data_batch)) self.__gradient_machine__.finish()
def main(): # initialize dataframe as data attained from the DataFeeder df = DataFeeder() # get feature and target data sets from cancer data features, target = df.get_data() # perform PCA with the option of 4 or 2 components #features = df.pca(n_components=4) features = df.pca(n_components=2) # find best hyperparameters (max depth for decision tree) scorer = make_scorer(f1_score, pos_label=0) params = find_best_params(features, target, scorer=scorer) features_train, features_test, target_train, target_test = train_test_split( features, target, stratify=target, random_state=1) # run training and testing data split std_train_test_split(features_train, features_test, target_train, target_test, max_depth=int(params['max_depth'])) # run cross validation cross_validation(features, target, max_depth=int(params['max_depth'])) plt.show()
def main(): batch_size = 100 mnist_train = datasets.MNIST("/hdd/Data/MNIST/", train=True, transform=transforms.ToTensor(), download=True) mnist_test = datasets.MNIST("/hdd/Data/MNIST/", train=False, transform=transforms.ToTensor(), download=True) data_feeder = DataFeeder(mnist_train, preprocess_workers=1, cuda_workers=1, cpu_size=10, cuda_size=10, batch_size=batch_size, use_cuda=True, volatile=False) data_feeder.start_queue_threads() cnn = ModelStruct(CNN().cuda(), 0.001) fcc = ModelStruct(FCC().cuda(), 0.001) test_data = make_batch(len(mnist_test), 0, mnist_test, use_cuda=True) for i in range(100001): images, labels = data_feeder.get_batch() train(cnn, images, labels, i) train(fcc, images, labels, i) if i % 100 == 0: evaluate_acc(cnn, test_data, i) evaluate_acc(fcc, test_data, i) if i in [33333, 66666]: decrease_lr(cnn) decrease_lr(fcc) if i % 20000 == 0: torch.save(cnn.model, "savedir/cnn_it" + str(i // 1000) + "k.pt") torch.save(fcc.model, "savedir/fcc_it" + str(i // 1000) + "k.pt") print(max(cnn.acc)) print(max(fcc.acc)) graph(fcc, cnn) cnn.losses = losses_to_ewma(cnn.losses) cnn.val_losses = losses_to_ewma(cnn.val_losses, alpha=0.3) cnn.acc = losses_to_ewma(cnn.acc) fcc.losses = losses_to_ewma(fcc.losses) fcc.val_losses = losses_to_ewma(fcc.val_losses, alpha=0.3) fcc.acc = losses_to_ewma(fcc.acc) graph(fcc, cnn) data_feeder.kill_queue_threads()
def test(self, reader, feeding=None): feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) evaluator.start() total_cost = 0 num_samples = 0.0 for data_batch in reader(): num_samples += len(data_batch) self.__gradient_machine__.forward(feeder(data_batch), out_args, api.PASS_TEST) total_cost += out_args.sum() self.__gradient_machine__.eval(evaluator) evaluator.finish() return v2_event.TestResult(evaluator=evaluator, cost=total_cost / num_samples)
num_training_images = 200 img_height = 288 img_width = 160 frames_to_mix = 3 input_shape = (3, img_height, img_width) remake_images = False ImageMaker = VideoToBlurredImgConverter(images_per_video, frames_to_mix, img_height, img_width, vid_dir = './data/', rebuild_target_dir=False) if remake_images: ImageMaker.make_and_save_images() train_fnames, val_fnames = train_val_split('./data', num_training_images, images_per_video) data_feeder = DataFeeder(batch_size=20, gen_only_batch_size=20, fnames=train_fnames) gen_model, disc_model, gen_disc_model = make_models(input_shape, n_filters_in_res_blocks=[64 for _ in range(3)], gen_filter_size=3, layers_in_res_blocks=2, res_block_subsample=(2, 2), filters_in_deconv=[32 for _ in range(3)], deconv_filter_size=3, n_disc_filters=[64, 32, 32]) trainer = Trainer(gen_model, disc_model, gen_disc_model, data_feeder, report_freq=10) trainer.train(n_steps=1000) gen_model, disc_model, gen_disc_model = trainer.get_models() save_predicted_images(gen_model, val_fnames)
def main(nn_config, data_config): df = DataFeeder(data_config) cl = SuperPrototypicalNet(nn_config, df) cl.classifier()
def train(self, reader, num_passes=1, event_handler=None, feeding=None): """ Training method. Will train num_passes of input data. :param reader: A reader that reads and yeilds data items. Usually we use a batched reader to do mini-batch training. :type reader: collections.Iterable :param num_passes: The total train passes. :param event_handler: Event handler. A method will be invoked when event occurred. :type event_handler: (BaseEvent) => None :param feeding: Feeding is a map of neural network input name and array index that reader returns. :type feeding: dict|list :return: """ import py_paddle.swig_paddle as api from data_feeder import DataFeeder if event_handler is None: event_handler = default_event_handler __check_train_args__(**locals()) self.__parameter_updater__ = self.__optimizer__.create_updater( self.__is_local__, num_passes, self.__use_sparse_updater__, self.__pserver_spec__, self.__use_etcd__) self.__parameter_updater__.init(self.__gradient_machine__) self.__gradient_machine__.start() batch_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(batch_evaluator, api.Evaluator) pass_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) feeder = DataFeeder(self.__data_types__, feeding) for pass_id in xrange(num_passes): event_handler(v2_event.BeginPass(pass_id)) pass_evaluator.start() self.__parameter_updater__.startPass() for batch_id, data_batch in enumerate(reader()): batch_evaluator.start() event_handler( v2_event.BeginIteration(pass_id=pass_id, batch_id=batch_id)) pass_type = self.__parameter_updater__.startBatch( len(data_batch)) in_args = feeder(data_batch) self.__prepare_parameter__(in_args) self.__gradient_machine__.forwardBackward( in_args, out_args, pass_type) self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(batch_evaluator) event_handler( v2_event.EndForwardBackward(pass_id=pass_id, batch_id=batch_id, gm=self.__gradient_machine__)) for each_param in self.__gradient_machine__.getNonStaticParameters( ): self.__parameter_updater__.update(each_param) cost_sum = out_args.sum() cost = cost_sum / len(data_batch) self.__parameter_updater__.finishBatch(cost) batch_evaluator.finish() event_handler( v2_event.EndIteration(pass_id=pass_id, batch_id=batch_id, cost=cost, evaluator=batch_evaluator, gm=self.__gradient_machine__)) self.__parameter_updater__.finishPass() pass_evaluator.finish() event_handler( v2_event.EndPass(pass_id, evaluator=pass_evaluator, gm=self.__gradient_machine__)) self.__gradient_machine__.finish()
def main(data_config, nn_config): df = DataFeeder(data_config) cl = Cascading(nn_config, df) cl.classifier()
import numpy as np import matplotlib.pyplot as plt from network import ImageTransformerNetwork from loss import Loss, VGG from utils import show_image, graph_losses from data_feeder import DataFeeder from PIL import Image coco_path = "/hdd/Data/MSCOCO2017/images" annFile = "/hdd/Data/MSCOCO2017/annotations" train_data_feeder = DataFeeder(coco_path + "/train2017/", annFile + "/captions_train2017.json", preprocess_workers=1, cuda_workers=1, numpy_size=20, cuda_size=2, batch_size=1) train_data_feeder.start_queue_threads() image_transformer_network = ImageTransformerNetwork().cuda() #image_transformer_network = torch.load("savedir/model_2_acidcrop_it90k.pt") vgg = VGG().cuda() vgg.eval() loss = Loss().cuda() for param in vgg.parameters(): param.requires_grad = False learning_rate = 0.0001
def main(): batch_size = 100 train_data, val_data, test_data = create_train_val_test_split(batch_size) data_feeder = DataFeeder(train_data, preprocess_workers=1, cuda_workers=1, cpu_size=10, cuda_size=10, batch_size=batch_size, use_cuda=True, volatile=False) data_feeder.start_queue_threads() val_data = make_batch(len(val_data), 0, val_data, use_cuda=True, volatile=True) test_data = make_batch(len(test_data), 0, test_data, use_cuda=True, volatile=True) cnn = CNN().cuda() fcc = FCC().cuda() optimizer_cnn = optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9, weight_decay=0.00001) optimizer_fcc = optim.SGD(fcc.parameters(), lr=0.001, momentum=0.9, weight_decay=0.00001) cnn_train_loss = Logger("cnn_train_losses.txt") cnn_val_loss = Logger("cnn_val_losses.txt") cnn_val_acc = Logger("cnn_val_acc.txt") fcc_train_loss = Logger("fcc_train_losses.txt") fcc_val_loss = Logger("fcc_val_losses.txt") fcc_val_acc = Logger("fcc_val_acc.txt") #permute = Variable(torch.from_numpy(np.random.permutation(28*28)).long().cuda(), requires_grad=False) permute = None for i in range(100001): images, labels = data_feeder.get_batch() train(cnn, optimizer_cnn, images, labels, i, cnn_train_loss, permute) train(fcc, optimizer_fcc, images, labels, i, fcc_train_loss, permute) if i % 100 == 0: print(i) evaluate_acc(batch_size, cnn, val_data, i, cnn_val_loss, cnn_val_acc, permute) evaluate_acc(batch_size, fcc, val_data, i, fcc_val_loss, fcc_val_acc, permute) if i in [70000, 90000]: decrease_lr(optimizer_cnn) decrease_lr(optimizer_fcc) if i % 1000 == 0: torch.save(cnn.state_dict(), "savedir/cnn_it" + str(i // 1000) + "k.pth") torch.save(fcc.state_dict(), "savedir/fcc_it" + str(i // 1000) + "k.pth") data_feeder.kill_queue_threads() import evaluate evaluate.main(permute)
def main(): """ Main function containing object initialization and method triggering order """ # data feeding object df = DataFeeder() # evaluation object ev = Evaluator() # get features and target data sets features, target = df.get_data(normalize=False) Plotter.plot_distribution(target, ["M", "B"], bins=2, title="Diagnosis Distribution", xlabel="Diagnosis", ylabel="Records") Plotter.plot_distribution(features.iloc[:, 1], bins=50, title="Texture Mean Distribution", xlabel="Texture Mean", ylabel="Records") Plotter.plot_distribution(features.iloc[:, 2], bins=50, title="Perimeter Mean Distribution", xlabel="Perimeter Mean", ylabel="Records") # get features and target data sets features, target = df.get_data() # run PCA # features = df.pca(n_components=2) # features = df.pca(n_components=4) features = df.pca(n_components=10) # split data features_train, features_test, target_train, target_test = Evaluator.split( features, target, stratify=target) # find best parameters based on F1-score scorer = make_scorer(f1_score, pos_label=0) linear_params, rbf_params = Evaluator.find_best_params(features_train, target_train, n_folds=10, scoring=scorer) # train and test model trained on K-fold cross validation ev.k_fold_cv(features, target, n_splits=10, linear_params=linear_params, rbf_params=rbf_params) # train and test linear SVM model with best parameter ev.run_linear_svm(features_train, features_test, target_train, target_test, params=linear_params) # train and test rbf SVM model with best parameter ev.run_rbf_svm(features_train, features_test, target_train, target_test, params=rbf_params) # show all plot figures plt.show()
from PyQt5.QtWidgets import QApplication from PyQt5.QtWidgets import QMainWindow from PyQt5.QtWidgets import QPushButton from PyQt5.QtWidgets import QLineEdit from pandas import Series, DataFrame import pandas as pd import numpy as np from data_feeder import DataFeeder from SHIndi import SHIndi if __name__ == "__main__": app = QApplication(sys.argv) IndiApp = SHIndi() IndiApp.show() IndiApp.connect() feeder = DataFeeder(IndiApp) #선물 종목 마스터 data = feeder.request(queryname="FRF_MST") #선물 주문 print("-------------------------------------\r\n") print(data) app.exec_()