def integrate_model(): with open("model_A.pickle", "rb") as f: model_A = pickle.load(f) with open("model_B.pickle", "rb") as f: model_B = pickle.load(f) if mode.get() == 0: test_data = pd.read_csv('test_svm.csv') inte_model = fed_integrate_model_svm(model_A, model_B) print test_data.head(6) elif mode.get() == 1: test_data = pd.read_csv('test_cart.csv') inte_model = fed_integrate_model_cart(model_A, model_B) else: test_data = pd.read_csv('test_lr.csv') inte_model = fed_integrate_model_lr(model_A, model_B) x_test = test_data.iloc[:, :-1].drop('Unnamed: 0', axis=1) # print x_test.columns # print x_test.shape y_test = test_data.iloc[:, -1] y_pre = inte_model.predict(x_test) # print y_test # print y_pre label_1.config(text='准确率为: ' + str(accuracy_score(y_pre, y_test)) + '\n' + '模型以保存为inte_model.pickle') save_model(inte_model, 'inte_model.pickle')
def train_mixup(nbEpochs=1): best_test_acc = 0 train_size = len(training_generator) train_size = len(training_generator.dataset) test_size = len(eval_generator) test_size = len(eval_generator.dataset) for epoch in range(nbEpochs): train_loss = 0.0 train_acc = 0.0 cnn.train() for index_batch, (inputs, labels) in enumerate(training_generator): inputs, labels = inputs.to(device), labels.long().to(device) inputs, lbl_a, lbl_b, lam = mixup_data(inputs, labels, alpha) #?? from torch.autograd import Variable #inputs, lbl_a, lbl_b = map(Variable, (inputs, lbl_a, lbl_b)) optimizer.zero_grad() outputs = cnn(inputs) #loss = criterion(outputs, labels) loss = mixup_criterion(criterion, outputs, lbl_a, lbl_b, lam) _, predicted = torch.max(outputs.data, 1) #train_acc += (predicted == labels).sum().item() train_acc += (lam * (predicted == lbl_a).sum().item() + (1 - lam) * (predicted == lbl_b).sum().item()) loss.backward() optimizer.step() train_loss += loss.item() progress_bar(index_batch, len(training_generator)) train_loss /= train_size train_acc /= train_size print("Train at Epoch:", epoch, " loss:", train_loss, " accuracy:", 100.0 * train_acc) test_loss = 0.0 test_acc = 0.0 cnn.eval() with torch.no_grad(): for index_batch, (inputs, labels) in enumerate(eval_generator): inputs, labels = inputs.to(device), labels.long().to(device) outputs = cnn(inputs) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) test_acc += (predicted == labels).sum().item() test_loss += loss.item() progress_bar(index_batch, len(eval_generator)) test_loss /= test_size test_acc /= test_size print("Test at Epoch:", epoch, " loss:", test_loss, " accuracy:", 100.0 * test_acc) lr = linear_adjust_learning_rate(epoch) save_result.append( [epoch, lr, train_loss, train_acc, test_loss, test_acc]) if epoch > 30 and test_acc > best_test_acc: best_test_acc = test_acc #Add save model save_model("Dorfer2", str(epoch))
def train_model(): data = load_file(loc.get()) #label_1.config(text = mode.get()) if mode.get() == 0: result_coef = train_svm(data) elif mode.get() == 1: result_coef = train_cart(data) else: result_coef = train_lr(data) label_1.config(text='准确率为: ' + str(result_coef[0])) para_dict['result'] = result_coef save_model(result_coef[1], 'model_A.pickle')
def after_run(self, run_context, run_values): """ Called after each call to run(). Args: run_context: a `tf.train.SessionRunContext` as the context to execute ops and tensors. run_values: results of requested ops/tensors by `before_run()`. """ if self._step < 0: self._step = run_values.results["global_step"] duration = time.time() - self._start_time loss_value = run_values.results["loss"] num_examples_per_step = FLAGS.batch_size if self.should_log(): examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) sec_per_epoch = (time.time() - self._tic) / self._epoch if not self._atomic_forces or FLAGS.forces_only: format_str = "step %6d, epoch=%7.2f, loss=%10.6f " \ "(%6.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)" tf.logging.info( format_str % (self._step, self._epoch, loss_value, examples_per_sec, sec_per_batch, sec_per_epoch) ) else: y_val = run_values.results['y_loss'] f_val = run_values.results['f_loss'] format_str = "step %6d, epoch=%7.2f, loss=%10.6f, y_loss=%10.6f, " \ "f_loss = %10.6f (%6.1f examples/sec; %7.3f sec/batch)" tf.logging.info( format_str % (self._step, self._epoch, loss_value, y_val, f_val, examples_per_sec, sec_per_batch) ) if self.should_freeze(): save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
def train(task_id, method, file_dir): # support multi thread, each thread create a new session with new graph with tf.Session(graph=tf.Graph()) as sess: tf.keras.backend.set_session(sess) train_fc = train_func.get(method.lower(), None) if train_fc is None: TRAIN_LOGGER.info( "[task{}]invalid method. not classification or segmentation". format(task_id)) return None model_file = train_fc(task_id, file_dir) gc.collect() #from tensorflow.keras import backend as K #tf.reset_default_graph() #K.clear_session() # save as .pb file if model_file is not None: saved_models_path = os.path.join(file_dir, TRAINED_MODEL_FOLDER) save_model(saved_models_path, SAVE_FINAL_MODEL_PB_FILE, model_file) # build android app images_dir = os.path.join(file_dir, IMAGE_FOLDER) label_file = os.path.join(file_dir, LABEL_INFO_FILE) image_dp = ImageDataPipeline(images_dir, label_file, image_size=IMAGE_SIZE) labels = ['' for _ in range(image_dp.labels_classes)] for key in image_dp.label_name_val_dict.keys(): labels[image_dp.label_name_val_dict[key]] = key is_success, outputs = build_android_app(labels, saved_models_path, model_file) if not is_success: return None return outputs return model_file
def train_with_multiple_gpus(): """ Train the KCNN model with mutiple gpus. """ set_logging_configs(debug=FLAGS.debug, logfile=join(FLAGS.train_dir, FLAGS.logfile)) # Output the process id tf.logging.info("~pid={}".format(getpid())) with tf.Graph().as_default(), tf.device('/cpu:0'): # Get or create the global step variable to count the number of train() # calls. This equals the number of batches processed * FLAGS.num_gpus. global_step = tf.contrib.framework.get_or_create_global_step() # Create an optimizer that performs gradient descent. with tf.name_scope("Optimizer"): learning_rate = kcnn.get_learning_rate(global_step) opt = kcnn.get_optimizer(learning_rate) # Initialize the input pipeline. total_batch_size = FLAGS.batch_size * FLAGS.num_gpus num_examples = pipeline.get_dataset_size(FLAGS.dataset, for_training=True) batch = pipeline.next_batch(for_training=True, shuffle=True, dataset_name=FLAGS.dataset, num_epochs=FLAGS.num_epochs, batch_size=total_batch_size) configs = pipeline.get_configs(for_training=True) params = extract_configs(configs, for_training=True) # Split the batch for each tower tensors_splits = get_splits(batch, num_splits=FLAGS.num_gpus) # Retain all non-tower summaries non_tower_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) # Calculate the gradients for each model tower. tower_grads = [] summaries = [] loss = None batchnorm_updates = [] reuse_variables = False for i in range(FLAGS.first_gpu_id, FLAGS.first_gpu_id + FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s%d' % (constants.TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the KCNN model. # This function constructs the entire model but shares the variables # across all towers. loss = tower_loss(tensors_splits[i], params, scope, reuse_variables) # Reuse variables for the next tower. reuse_variables = True # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Retain the Batch Normalization updates operations only from the # final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. if FLAGS.normalizer and FLAGS.normalizer == 'batch_norm': batchnorm_updates = tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) summaries.extend( add_total_norm_summaries(grads, "yf", only_summary_total=False)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. # Add histograms for gradients. with tf.name_scope("Summary"): for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. with tf.name_scope("average"): variable_averages = tf.train.ExponentialMovingAverage( constants.VARIABLE_MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. if FLAGS.normalizer and FLAGS.normalizer == 'batch_norm': batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(batchnorm_updates_op, apply_gradient_op, variables_averages_op) else: train_op = tf.group(apply_gradient_op, variables_averages_op) # Save the training flags save_training_flags(FLAGS.train_dir, FLAGS.flag_values_dict()) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.max_to_keep) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries + non_tower_summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Restore the previous checkpoint start_step = 1 if FLAGS.restore_training or FLAGS.restore_weights_from: start_step = restore_previous_checkpoint(sess, global_step) max_steps = int(FLAGS.num_epochs * num_examples / total_batch_size) + 1 # Create the summary writer summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) tic = time.time() for step in range(start_step, max_steps): start_time = time.time() try: _, loss_value = sess.run([train_op, loss]) except tf.errors.OutOfRangeError: tf.logging.info("Stop this training after {} epochs.".format( FLAGS.num_epochs)) checkpoint_path = join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info("{}-{} saved".format(checkpoint_path, step)) break duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_frequency == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus epoch = step * total_batch_size / num_examples sec_per_epoch = (time.time() - tic) / epoch format_str = "%s: step %6d, epoch=%7.2f, loss = %10.6f " \ "(%8.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)" tf.logging.info( format_str % (datetime.now(), step, epoch, loss_value, examples_per_sec, sec_per_batch, sec_per_epoch)) if step % FLAGS.save_frequency == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % (20 // FLAGS.num_gpus * FLAGS.save_frequency) == 0 or \ (step + 1) == max_steps: checkpoint_path = join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info("{}-{} saved".format(checkpoint_path, step)) if FLAGS.freeze_frequency > 0 and step > 0: if step % FLAGS.freeze_frequency == 0 or (step + 1) == max_steps: save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes) else: tf.logging.info('The maximum number of epochs already reached!') # Save the final model if FLAGS.freeze_frequency > 0: save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
# -*- coding: utf-8 -*- """ Created on 2018/5/1 @author: susmote """ from load_data import load_data from train_model import lr_train_bgd from save_model import save_model if __name__ == "__main__": # 导入数据 print("load data".center(30, '-')) feature, label = load_data("data.txt") # 训练LR模型 print("training".center(30, '-')) w = lr_train_bgd(feature, label, 1000, 0.01) # 保存模型 print("save model".center(30, '-')) save_model("weights", w)
def build_model(device, img_size, channels, test_split, batch_size, workers, model_arch, epochs, learning_rate, swa, enable_scheduler, loss='BCEDiceLoss', all_data=False, tta=False): # create data loaders trainloader, testloader, validloader = build_dataloaders( image_size=(img_size, img_size), channels=channels, test_split=test_split, batch_size=batch_size, num_workers=workers, all_data=all_data, data_filepath='../siim-train-test/') # setup the device if device == None: device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") # initialize model if model_arch == 'UNet': model = UNet(num_classes=1, depth=6, start_filts=8, merge_mode='concat') if model_arch == 'UNet11': model = UNet11(pretrained=True) if model_arch == 'UNet16': model = UNet16(num_classes=1, pretrained=True) if model_arch == 'AlbuNet': model = AlbuNet(num_classes=1, pretrained=True) if model_arch == 'NestedUNet': model = NestedUNet() if model_arch == 'Unet_2D': model = Unet_2D(n_channels=channels, n_classes=1) if model_arch == 'Res34Unetv4': model = Res34Unetv4() if model_arch == 'Res34Unetv3': model = Res34Unetv3() if model_arch == 'Res34Unetv5': model = Res34Unetv5() if model_arch == 'BrainUNet': model = brain_unet(pretrained=True) if model_arch == 'R2U_Net': model = R2U_Net() if model_arch == 'AttU_Net': model = AttU_Net() if model_arch == 'R2AttU_Net': model = R2AttU_Net() # setup criterion, optimizer and metrics optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if loss == 'BCEDiceLoss': criterion = BCEDiceLoss() if loss == 'LovaszSoftmaxLoss': criterion = LovaszSoftmaxLoss() if loss == 'JaccardLoss': criterion = JaccardLoss(device=device) if loss == 'mIoULoss': criterion = mIoULoss(n_classes=1) if loss == 'WeightedBCEDiceLoss': criterion = WeightedBCEDiceLoss() metric = iou_score #train model model, train_losses, test_losses, train_metrics, test_metrics = train( model, device, trainloader, testloader, optimizer, criterion, metric, epochs, learning_rate, swa=swa, enable_scheduler=enable_scheduler, model_arch=model_arch) # create submission filename = 'submission_' + model_arch + '_lr' + str( learning_rate) + '_' + str(epochs) + '.csv' print('Generating submission to ' + filename + '\n') thresholds, ious, index_max, threshold_max = determine_threshold( model, device, testloader, image_size=(img_size, img_size), channels=channels) make_submission(filename, device, model, validloader, image_size=(img_size, img_size), channels=channels, threshold=threshold_max, original_size=1024, tta=tta) # save the model save_model(model, model_arch, learning_rate, epochs, train_losses, test_losses, train_metrics, test_metrics, filepath='models_checkpoints')
def build_from_checkpoint(filename, device, img_size, channels, test_split, batch_size, workers, epochs, learning_rate, swa, enable_scheduler, loss='BCEDiceLoss', all_data=False, tta=False): # create data loaders trainloader, testloader, validloader = build_dataloaders( image_size=(img_size, img_size), channels=channels, test_split=test_split, batch_size=batch_size, num_workers=workers, all_data=all_data) # setup the device if device == None: device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") # restore model model, model_arch, train_losses_0, test_losses_0, train_metrics_0, test_metrics_0 = load_model( filename, device, channels=channels) # setup criterion, optimizer and metrics optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if loss == 'BCEDiceLoss': criterion = BCEDiceLoss() if loss == 'LovaszSoftmaxLoss': criterion = LovaszSoftmaxLoss() if loss == 'JaccardLoss': criterion = JaccardLoss(device=device) if loss == 'mIoULoss': criterion = mIoULoss(n_classes=1) if loss == 'WeightedBCEDiceLoss': criterion = WeightedBCEDiceLoss() metric = iou_score #train model model, train_losses, test_losses, train_metrics, test_metrics = train( model, device, trainloader, testloader, optimizer, criterion, metric, epochs, learning_rate, swa=swa, enable_scheduler=enable_scheduler, model_arch=model_arch) train_losses = train_losses + train_losses_0 test_losses = test_losses + test_losses_0 train_metrics = train_metrics + train_metrics_0 test_metrics = test_metrics + test_metrics_0 # create submission filename = 'submission_' + model_arch + '_lr' + str( learning_rate) + '_' + str(epochs) + '.csv' print('Generating submission to ' + filename + '\n') thresholds, ious, index_max, threshold_max = determine_threshold( model, device, testloader, image_size=(img_size, img_size)) make_submission(filename, device, model, validloader, image_size=(img_size, img_size), channels=channels, threshold=threshold_max, original_size=1024, tta=tta) # save the model save_model(model, model_arch, learning_rate, epochs, train_losses, test_losses, train_metrics, test_metrics, filepath='models_checkpoints')
def get_memory(): return (process.memory_info().rss - process.memory_info().shared) / 1048576 #memory usage in bytes x_train, x_test, train_lable, test_lable = load_data() #Load data from folder #for droprate in [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]: for batchsize in [64, 128, 256, 512]: base_memory = get_memory() start_time = time.time() model, history = run_new_model(x_train, x_test, train_lable, test_lable, Batchnorm=False, Droprate=0.5, Batchsize=batchsize, epochs=20) end_time = time.time() - start_time memory = get_memory() - base_memory #correct for data mem usage! print("--- %s seconds , %s mb memory in usage ---" % (end_time, memory)) save_model(model, history, "batchsize_no_batchnorm_" + str(batchsize), end_time, memory) del model, history #.fit() blows up ram gc.collect() #memory management! clear_session() #memory management!
hist = model.fit( X_train, y_train, batch_size=batch_size, epochs=training_epochs, validation_split=val_split, ) acc, result = evaluate_model( X_test, y_test, categories, model, limit=-1, return_prediction_array=True ) # print(f'{"-"*80}\nPerformance on holdout set of {len(y_test)} images: \nAccuracy:{round(acc,6)}\nHistory: {hist.history}') # - - - SAVE THE MODEL (OPTIONAL) saveme = input("Save model? (y/n): ") if "y" in saveme: save_model(model, categories, val_split, acc, hist) # - - - WHERE DOES THE MODEL GO WRONG? WHAT CHARS CAN WE CONSISTANTLY RECOGNIZE? yhat_probs = model.predict(X_test) cats = np.array(categories) top_3_pred = [] bad_pred = [] for i, y in enumerate(yhat_probs): true_label = cats[np.argmax(y_test[i])] idx = np.argsort(y)[::-1] top_cats = cats[idx] if top_cats[0] != true_label: print("\nBad prediction: ") print(y) print("TRUE LABEL: ", true_label) print(idx)
def train(): print('Gathering Arguments...') args = train_input_args() data_dir = args.data_dir print("data_dir:", data_dir) save_dir = args.save_dir print("save_dir:", save_dir) arch = args.arch print("arch:", arch) learning_rate = args.learning_rate print("learning_rate:", learning_rate) hidden_units = args.hidden_units print("hidden_units:", hidden_units) epochs = args.epochs print("epochs:", epochs) gpu = args.gpu print("gpu:", gpu) # --------------------------------------------------------- print('Setting up transforms...') data_types = ['train', 'valid', 'test'] rotation = 30 resize = 225 crop_size = 224 normalize_mean = [0.485, 0.456, 0.406] normalize_std = [0.229, 0.224, 0.225] # Define transforms for the training, validation, and testing sets data_transforms = { 'train': transforms.Compose([ transforms.RandomRotation(rotation), transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(normalize_mean, normalize_std) ]), 'valid': transforms.Compose([ transforms.Resize(resize), transforms.CenterCrop(crop_size), transforms.ToTensor(), transforms.Normalize(normalize_mean, normalize_std) ]), 'test': transforms.Compose([ transforms.Resize(resize), transforms.CenterCrop(crop_size), transforms.ToTensor(), transforms.Normalize(normalize_mean, normalize_std) ]), } # Load the datasets with ImageFolder image_datasets = { x: datasets.ImageFolder(data_dir + '/' + x, transform=data_transforms[x]) for x in data_types } # Using the image datasets and the transforms, define the dataloaders dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True) for x in data_types } dataset_sizes = {x: len(image_datasets[x]) for x in data_types} # --------------------------------------------------------- print('Setting up Device & Models...') # set the device to what the cpu if the user requests it otherwise # default try to use cuda if the device is cuda capable. device = 'cpu' if gpu == 'cpu' else torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") print('Device:', device) resnet18 = models.resnet18(pretrained=True) alexnet = models.alexnet(pretrained=True) vgg16 = models.vgg16(pretrained=True) architectures = {'resnet': resnet18, 'alexnet': alexnet, 'vgg16': vgg16} model = architectures[arch] for param in model.parameters(): param.requires_grad = False classifier = nn.Sequential( OrderedDict([('fc1', nn.Linear(25088, hidden_units)), ('relu', nn.ReLU()), ('fc2', nn.Linear(hidden_units, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier # --------------------------------------------------------- # Criterion NLLLoss which is recommended with Softmax final layer criterion = nn.NLLLoss() # Observe that all parameters are being optimized optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) # Decay LR by a factor of 0.1 every 4 epochs scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1) # Set the model to the device model.to(device) # --------------------------------------------------------- print('Training the model...') # Train the model model_ft = train_model(model, criterion, optimizer, scheduler, dataset_sizes, dataloaders, epochs, device) # --------------------------------------------------------- print('Saving the model...') save_model(model_ft, image_datasets, arch, save_dir) print('Saved model successfully!')
from preprocessing import * from save_model import save_model from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score # I put this program in the same folder as MLGame/games/arkaonid/ml # you can edit path to get log folder if __name__ == "__main__": # preprocessing data_set = get_dataset() X, y = combine_multiple_data(data_set) # %% training model = KNeighborsClassifier(n_neighbors=3) x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) model.fit(x_train, y_train) y_predict = model.predict(x_test) print("model:", model) print(accuracy_score(y_predict, y_test)) # %% save the model save_model(model, "model.pickle")
def train(model, device, trainloader, testloader, optimizer, criterion, metric, epochs, learning_rate, swa=True, enable_scheduler=True, model_arch=''): ''' Function to perform model training. ''' model.to(device) steps = 0 running_loss = 0 running_metric = 0 print_every = 100 train_losses = [] test_losses = [] train_metrics = [] test_metrics = [] if swa: # initialize stochastic weight averaging opt = SWA(optimizer) else: opt = optimizer # learning rate cosine annealing if enable_scheduler: scheduler = lr_scheduler.CosineAnnealingLR(optimizer, len(trainloader), eta_min=0.0000001) for epoch in range(epochs): if enable_scheduler: scheduler.step() for inputs, labels in trainloader: steps += 1 # Move input and label tensors to the default device inputs, labels = inputs.to(device), labels.to(device) opt.zero_grad() outputs = model.forward(inputs) loss = criterion(outputs, labels.float()) loss.backward() opt.step() running_loss += loss running_metric += metric(outputs, labels.float()) if steps % print_every == 0: test_loss = 0 test_metric = 0 model.eval() with torch.no_grad(): for inputs, labels in testloader: inputs, labels = inputs.to(device), labels.to(device) outputs = model.forward(inputs) test_loss += criterion(outputs, labels.float()) test_metric += metric(outputs, labels.float()) print(f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Test loss: {test_loss/len(testloader):.3f}.. " f"Train metric: {running_metric/print_every:.3f}.. " f"Test metric: {test_metric/len(testloader):.3f}.. ") train_losses.append(running_loss / print_every) test_losses.append(test_loss / len(testloader)) train_metrics.append(running_metric / print_every) test_metrics.append(test_metric / len(testloader)) running_loss = 0 running_metric = 0 model.train() if swa: opt.update_swa() save_model(model, model_arch, learning_rate, epochs, train_losses, test_losses, train_metrics, test_metrics, filepath='models_checkpoints') if swa: opt.swap_swa_sgd() return model, train_losses, test_losses, train_metrics, test_metrics
def main(): print('Beginning program') # get config config = Config().config print('change lr:', config.change_lr) print('change bs:', config.change_bs) print('max epochs:', config.epochs) if config.change_bs and config.change_lr: print('[!] Whoops: both config.change_bs and config.change_lr are ' 'true -- at least one of them should be false.') return # get directories log_dir = get_log_dir(config) data_dir = get_data_dir() #train_dir = get_train_dir() #test_dir = get_test_dir() image_dir = get_celeba_dir() print('log dir:', log_dir) print('data dir:', data_dir) #print('train dir:', train_dir) #print('test dir:', test_dir) print('image_dir:', image_dir) # get data print('Loading data...') data_dict = get_celeba_data(data_dir) x_data, y_data = retrieve_celeba_data(data_dict=data_dict, image_dir=image_dir) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, shuffle=True) num_train = int(x_train.shape[0] * 0.8) print(f'Num training examples (excludes test and val): {num_train}') # build and save initial model input_dim = x_train[0].shape model = build_model(input_dim, config, model_type=config.complexity) save_model(log_dir=log_dir, config=config, model=model) # set variables val_loss = [] val_acc = [] loss = [] acc = [] lr = [] bs = [] max_epochs = config.epochs batch_size = config.batch_size batch_size_mult = 2 epoch_iter = 1 # get callbacks callbacks = Callbacks(config, log_dir).callbacks print('callbacks:') for callback in callbacks: print('\t', callback) # train model if config.change_lr: # reduce_lr callback takes care of everything for us print('Will change learning rate during training, but not batch size') print('Training model...') history = model.fit(x_data, y_data, epochs=max_epochs, batch_size=batch_size, shuffle=True, validation_split=0.2, verbose=1, callbacks=callbacks) # store history (bs is constant) val_loss += history.history['val_loss'] val_acc += history.history['val_acc'] loss += history.history['loss'] acc += history.history['acc'] lr += history.history['lr'] bs = [batch_size for i in range(len(history.epoch))] elif config.change_bs: # need to manually stop and restart training print('Will change batch size during training, but not learning rate') while max_epochs >= epoch_iter: print( f'Currently at epoch {epoch_iter} of {max_epochs}, batch size is {batch_size}' ) epochs = max_epochs - epoch_iter + 1 history = model.fit(x_data, y_data, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.2, verbose=1, callbacks=callbacks) # store history val_loss += history.history['val_loss'] val_acc += history.history['val_acc'] loss += history.history['loss'] acc += history.history['acc'] bs += [batch_size for i in range(len(history.epoch))] # update training parameters epoch_iter += len(history.epoch) batch_size *= batch_size_mult batch_size = batch_size if batch_size < num_train else num_train # store lr history as constant (because it is) lr = [0.001 for i in range(len(bs))] else: print('Will not change learning rate nor batch size during training') print('Training model...') history = model.fit(x_data, y_data, epochs=max_epochs, batch_size=batch_size, shuffle=True, validation_split=0.2, verbose=1, callbacks=callbacks) # store history (bs is constant) val_loss += history.history['val_loss'] val_acc += history.history['val_acc'] loss += history.history['loss'] acc += history.history['acc'] lr = [0.001 for i in range(len(history.epoch))] bs = [batch_size for i in range(len(history.epoch))] print('Completed training') # save finished model -- overrides original model saved before training save_model(log_dir=log_dir, config=config, model=model) # save loss, accuracy, lr, and bs values across epochs as json; # have to force cast lr vals as float64 because history object saves them # as float32, and json.dump() is not compatible with float32 acc_loss_lr_bs = { 'val_loss': val_loss, 'val_acc': val_acc, 'loss': loss, 'acc': acc, 'lr': [np.float64(i) for i in lr], 'bs': bs } acc_loss_lr_bs_path = os.path.join(log_dir, 'acc_loss_lr_bs.json') with open(acc_loss_lr_bs_path, 'w') as f: json.dump(acc_loss_lr_bs, f, indent=4, sort_keys=True) # evaluate model (on original batch size) print('Calculating final score...') #x_data, y_data = retrieve_data(data_dict=data_dict, image_dir=test_dir) score = model.evaluate(x_test, y_test, batch_size=config.batch_size) print('Final score:', score) print('Completed program') return
def train_model(): """ Train the neural network model. """ set_logging_configs( debug=FLAGS.debug, logfile=join(FLAGS.train_dir, FLAGS.logfile) ) with tf.Graph().as_default(): # Get the global step global_step = tf.contrib.framework.get_or_create_global_step() # Inference the kCON energy model y_calc, y_true, y_weights, f_calc, f_true, n_atom = kcnn_from_dataset( FLAGS.dataset, for_training=True, num_epochs=FLAGS.num_epochs ) # Cast `y_true` and `f_true` to `tf.float32` and set the shape of the # `y_calc` explicitly. y_calc.set_shape(y_true.get_shape().as_list()) y_true = tf.cast(y_true, tf.float32) if f_true is not None: f_true = tf.cast(f_true, tf.float32) # Setup the loss function y_loss = None f_loss = None if not FLAGS.forces: total_loss = kcnn.get_y_loss(y_true, y_calc, y_weights) elif FLAGS.forces_only: total_loss = kcnn.get_f_loss(f_true, f_calc) elif FLAGS.amp: total_loss, y_loss, f_loss = kcnn.get_amp_yf_joint_loss( y_true, y_calc, f_true, f_calc, n_atom ) else: total_loss, y_loss, f_loss = kcnn.get_yf_joint_loss( y_true, y_calc, f_true, f_calc ) # Build a Graph that trains the model. if FLAGS.forces and FLAGS.alter_train_op: train_op = kcnn.get_yf_train_op(total_loss, y_loss, f_loss, global_step) else: train_op = kcnn.get_joint_loss_train_op(total_loss, global_step) # Save the training flags if tf.__version__ >= "1.6.0": save_training_flags(FLAGS.train_dir, FLAGS.flag_values_dict()) else: save_training_flags(FLAGS.train_dir, dict(FLAGS.__dict__["__flags"])) # Get the total number of training examples num_examples = pipeline.get_dataset_size(FLAGS.dataset) max_steps = int(num_examples * FLAGS.num_epochs / FLAGS.batch_size) class RunHook(tf.train.SessionRunHook): """ Log loss and runtime and regularly freeze the model. """ def __init__(self, atomic_forces=False, should_freeze=True): """ Initialization method. """ super(RunHook, self).__init__() self._step = -1 self._tic = time.time() self._start_time = 0 self._epoch = 0.0 self._epoch_per_step = FLAGS.batch_size / num_examples self._log_frequency = FLAGS.log_frequency self._should_freeze = should_freeze self._freeze_frequency = FLAGS.freeze_frequency self._atomic_forces = atomic_forces def begin(self): """ Called once before using the session. """ self._step = -2 def before_run(self, run_context): """ Called before each call to run(). Args: run_context: a `tf.train.SessionRunContext` as the context to execute ops and tensors. Returns: args: a `tf.train.SessionRunArgs` as the ops and tensors to execute under `run_context`. """ self._step += 1 self._epoch = self._step * self._epoch_per_step self._start_time = time.time() if not self._atomic_forces or FLAGS.forces_only: return tf.train.SessionRunArgs({"loss": total_loss, "global_step": global_step}) else: return tf.train.SessionRunArgs({"loss": total_loss, "y_loss": y_loss, "f_loss": f_loss, "global_step": global_step}) def should_log(self): """ Return True if we should log the stats of current step. """ return self._step % self._log_frequency == 0 def should_freeze(self): """ Return True if we should freeze the current graph and values. """ return self._should_freeze and self._step % self._freeze_frequency == 0 def after_run(self, run_context, run_values): """ Called after each call to run(). Args: run_context: a `tf.train.SessionRunContext` as the context to execute ops and tensors. run_values: results of requested ops/tensors by `before_run()`. """ if self._step < 0: self._step = run_values.results["global_step"] duration = time.time() - self._start_time loss_value = run_values.results["loss"] num_examples_per_step = FLAGS.batch_size if self.should_log(): examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) sec_per_epoch = (time.time() - self._tic) / self._epoch if not self._atomic_forces or FLAGS.forces_only: format_str = "step %6d, epoch=%7.2f, loss=%10.6f " \ "(%6.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)" tf.logging.info( format_str % (self._step, self._epoch, loss_value, examples_per_sec, sec_per_batch, sec_per_epoch) ) else: y_val = run_values.results['y_loss'] f_val = run_values.results['f_loss'] format_str = "step %6d, epoch=%7.2f, loss=%10.6f, y_loss=%10.6f, " \ "f_loss = %10.6f (%6.1f examples/sec; %7.3f sec/batch)" tf.logging.info( format_str % (self._step, self._epoch, loss_value, y_val, f_val, examples_per_sec, sec_per_batch) ) if self.should_freeze(): save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes) run_meta = tf.RunMetadata() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) if FLAGS.restore_2body_only: var_list = [] for var in tf.global_variables(): vk = get_k_from_var(var) if vk == -1 or vk == 2: var_list.append(var) else: var_list = tf.global_variables() scaffold = tf.train.Scaffold( saver=tf.train.Saver(max_to_keep=FLAGS.max_to_keep, var_list=var_list)) # noinspection PyMissingOrEmptyDocstring class TimelineHook(tf.train.SessionRunHook): """ A hook to output tracing results for further performance analysis. """ def __init__(self): super(TimelineHook, self).__init__() self._counter = -1 def begin(self): self._counter = -1 def get_ctf(self): return join(FLAGS.train_dir, "prof_%d.json" % self._counter) def should_save(self): return FLAGS.timeline and self._counter % FLAGS.save_frequency == 0 def after_run(self, run_context, run_values): self._counter += 1 if self.should_save(): timeline = Timeline(step_stats=run_meta.step_stats) ctf = timeline.generate_chrome_trace_format(show_memory=True) with open(self.get_ctf(), "w+") as f: f.write(ctf) export_graph = True if FLAGS.freeze_frequency else False with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, save_summaries_steps=FLAGS.save_frequency, hooks=[RunHook(should_freeze=export_graph, atomic_forces=FLAGS.forces), TimelineHook(), tf.train.StopAtStepHook(last_step=max_steps)], scaffold=scaffold, config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, allow_soft_placement=True)) as mon_sess: while not mon_sess.should_stop(): try: if FLAGS.timeline: mon_sess.run( train_op, options=run_options, run_metadata=run_meta ) else: mon_sess.run(train_op) except tf.errors.OutOfRangeError: tf.logging.info( "Stop this training after {} epochs.".format(FLAGS.num_epochs)) break # Do not forget to export the final model if export_graph: save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) loss = [] def write_list(filename, data): thefile = open(filename, 'a') thefile.write("%s\n" % data) for epoch in range(num_epochs): train_loss = 0 for data in dataloader: img, _ = data img = Variable(img).cuda() # ===================forward===================== output = model(img) loss = criterion(output, img) train_loss += loss.data[0] # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== write_list("loss.txt", train_loss / len(dataloader.dataset)) print('epoch [{}/{}], loss:{:.4f}'.format( epoch + 1, num_epochs, train_loss / len(dataloader.dataset))) on_epoch_end(epoch, output, size, img) save_model(epoch, model)
def train(self): pos_features = [] neg_features = [] print(self.pos_path) print(self.neg_path) # pos_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(self.pos_path) for file in files] # neg_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(self.neg_path) for file in files] for file in os.listdir(self.pos_path): path = os.path.join(self.pos_path, file) image = cv2.imread(path) pos_features.append(extract_features(image, self.config)) for file in os.listdir(self.neg_path): path = os.path.join(self.neg_path, file) image = cv2.imread(path) neg_features.append(extract_features(image, self.config)) # random shuffling of the features random.shuffle(pos_features) random.shuffle(neg_features) print("{} positive features, {} negative features \n".format( len(pos_features), len(neg_features))) print("scaling.... \n") xscaler = StandardScaler().fit(pos_features + neg_features) pos_features = xscaler.transform(pos_features) neg_features = xscaler.transform(neg_features) print("Saving features to file Features \n") file = "D:\Sabahuddin\svm_hog_speed\FEATURE_DATA.p" try: pickle.dump({ "positive": pos_features, "negative": neg_features }, open(file, 'wb')) print("Feature Data saved to {}".format(file)) except Exception as e: print('Failed to save the model at the destination file {}:{}'. format(file, e)) raise features = np.vstack((pos_features, neg_features)).astype(float) labels = np.hstack( (np.ones(len(pos_features)), np.zeros(len(neg_features)))) print(" splitting the features into train and validation sets... \n") xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3, random_state=42) print(" size of train set {}".format(len(xtrain))) print(" size of test set {}".format(len(xtest))) svm = LinearSVC(max_iter=3000, C=1, loss="squared_hinge", penalty='l1', dual=False, fit_intercept=False) start_time = time.time() print(" training the classifier with the train set... \n") svm.fit(xtrain, ytrain) print(" trained in {:.1f}s".format(time.time() - start_time)) # ytest = ytest.reshape(1, -1) prediction = svm.predict(xtest) print("prediction \n", prediction) print("ytest \n", ytest) print("validation accuracy is {:f}".format(svm.score(xtest, ytest))) # clf_model, scaler, file, config save_model(svm, xscaler, 'D:\Sabahuddin\svm_hog_speed\MODEL_SVM_HOG_try1.p', self.config)
acc = 0 val_split = .1 while acc < .90: print("Building model...") model = build_model(X_train,num_categories =num_categories, filter_size=filter_size) print("Fiting training data to model...") model.fit(X_train,y_train,batch_size= 64, epochs = 3, validation_split = val_split) # - - - EVALUATE MODEL FOR ACCURACY AGAINST HOLDOUT SET print('Evaluating trained model against holdout dataset...') acc,result = evaluate_model(X_test,y_test, categories, model,limit = -1, return_prediction_array=True) print(f'{"-"*80}\nAccuracy on holdout set: {round(acc,6)}') # - - - SAVE THE MODEL (OPTIONAL) saveme= input('Save model? (y/n): ') if 'y' in saveme: save_model(model) # today = str(dt.now().date()) # timestamp = str(dt.now().date()) + "T:"+ str(dt.now().time())[0:8] # model.save(f'../models/simpleCNN-{timestamp}.h5') # creates a HDF5 file 'my_model.h5' # print(f"Saved as models/simpleCNN-{timestamp}.h5") # with open(f'../models/reports/simpleCNN-{timestamp}.txt','w') as f: # f.write(f'Classes in data: {categories}\n') # f.write(f'Train-to-Holdout Ratio: {1-val_split}\n') # f.write(f'Holdout Accuracy: {acc}\n') # f.close() # print(f"Report of model saved at models/reports/simpleCNN-{timestamp}.txt ") ''' REFERENCE: For reading in a pickeled file:
from sklearn import svm # I put this program in the same folder as MLGame/games/arkaonid/ml # you can edit path to get log folder if __name__ == "__main__": # preprocessing data_set = get_dataset() X, y = combine_multiple_data(data_set) # %% training x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) param_grid = { 'weights': ('uniform', 'distance'), 'algorithm': ('auto', 'ball_tree', 'kd_tree', 'brute'), # 'gamma': [0.1, 1, 10], # 'epsilon': [0.01, 0.05, 0.1, 0.5, 1.0] } knn = KNeighborsClassifier(n_neighbors=3) gclf = GridSearchCV(knn, param_grid, cv=5) gclf.fit(x_train, y_train) y_predict = gclf.predict(x_test) # extract the best parameters bestModel = gclf.best_estimator_ best_score = gclf.best_score_ print("Best Model:", bestModel) print("Training score:", best_score) print("Test score", accuracy_score(y_predict, y_test)) # %% save the model save_model(bestModel, "model.pickle")