Exemple #1
0
def integrate_model():
    with open("model_A.pickle", "rb") as f:
        model_A = pickle.load(f)
    with open("model_B.pickle", "rb") as f:
        model_B = pickle.load(f)
    if mode.get() == 0:
        test_data = pd.read_csv('test_svm.csv')
        inte_model = fed_integrate_model_svm(model_A, model_B)
        print test_data.head(6)
    elif mode.get() == 1:
        test_data = pd.read_csv('test_cart.csv')
        inte_model = fed_integrate_model_cart(model_A, model_B)
    else:
        test_data = pd.read_csv('test_lr.csv')
        inte_model = fed_integrate_model_lr(model_A, model_B)

    x_test = test_data.iloc[:, :-1].drop('Unnamed: 0', axis=1)
    # print x_test.columns
    # print x_test.shape
    y_test = test_data.iloc[:, -1]
    y_pre = inte_model.predict(x_test)
    # print y_test
    # print y_pre
    label_1.config(text='准确率为: ' + str(accuracy_score(y_pre, y_test)) + '\n' +
                   '模型以保存为inte_model.pickle')
    save_model(inte_model, 'inte_model.pickle')
Exemple #2
0
def train_mixup(nbEpochs=1):
    best_test_acc = 0
    train_size = len(training_generator)
    train_size = len(training_generator.dataset)
    test_size = len(eval_generator)
    test_size = len(eval_generator.dataset)
    for epoch in range(nbEpochs):
        train_loss = 0.0
        train_acc = 0.0
        cnn.train()
        for index_batch, (inputs, labels) in enumerate(training_generator):
            inputs, labels = inputs.to(device), labels.long().to(device)
            inputs, lbl_a, lbl_b, lam = mixup_data(inputs, labels, alpha)
            #?? from torch.autograd import Variable
            #inputs, lbl_a, lbl_b = map(Variable, (inputs, lbl_a, lbl_b))

            optimizer.zero_grad()
            outputs = cnn(inputs)
            #loss = criterion(outputs, labels)
            loss = mixup_criterion(criterion, outputs, lbl_a, lbl_b, lam)
            _, predicted = torch.max(outputs.data, 1)
            #train_acc += (predicted == labels).sum().item()
            train_acc += (lam * (predicted == lbl_a).sum().item() + (1 - lam) *
                          (predicted == lbl_b).sum().item())
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            progress_bar(index_batch, len(training_generator))
        train_loss /= train_size
        train_acc /= train_size
        print("Train at Epoch:", epoch, " loss:", train_loss, " accuracy:",
              100.0 * train_acc)

        test_loss = 0.0
        test_acc = 0.0
        cnn.eval()
        with torch.no_grad():
            for index_batch, (inputs, labels) in enumerate(eval_generator):
                inputs, labels = inputs.to(device), labels.long().to(device)
                outputs = cnn(inputs)
                loss = criterion(outputs, labels)
                _, predicted = torch.max(outputs.data, 1)
                test_acc += (predicted == labels).sum().item()
                test_loss += loss.item()
                progress_bar(index_batch, len(eval_generator))
        test_loss /= test_size
        test_acc /= test_size
        print("Test at Epoch:", epoch, " loss:", test_loss, " accuracy:",
              100.0 * test_acc)

        lr = linear_adjust_learning_rate(epoch)
        save_result.append(
            [epoch, lr, train_loss, train_acc, test_loss, test_acc])

        if epoch > 30 and test_acc > best_test_acc:
            best_test_acc = test_acc
            #Add save model
            save_model("Dorfer2", str(epoch))
def train_model():
    data = load_file(loc.get())
    #label_1.config(text = mode.get())
    if mode.get() == 0:
        result_coef = train_svm(data)
    elif mode.get() == 1:
        result_coef = train_cart(data)
    else:
        result_coef = train_lr(data)
    label_1.config(text='准确率为: ' + str(result_coef[0]))
    para_dict['result'] = result_coef
    save_model(result_coef[1], 'model_A.pickle')
Exemple #4
0
      def after_run(self, run_context, run_values):
        """
        Called after each call to run().

        Args:
          run_context: a `tf.train.SessionRunContext` as the context to execute
           ops and tensors.
          run_values: results of requested ops/tensors by `before_run()`.

        """
        if self._step < 0:
          self._step = run_values.results["global_step"]

        duration = time.time() - self._start_time
        loss_value = run_values.results["loss"]
        num_examples_per_step = FLAGS.batch_size

        if self.should_log():
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)
          sec_per_epoch = (time.time() - self._tic) / self._epoch

          if not self._atomic_forces or FLAGS.forces_only:
            format_str = "step %6d, epoch=%7.2f, loss=%10.6f " \
                       "(%6.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)"
            tf.logging.info(
              format_str % (self._step, self._epoch, loss_value,
                            examples_per_sec, sec_per_batch, sec_per_epoch)
            )
          else:
            y_val = run_values.results['y_loss']
            f_val = run_values.results['f_loss']

            format_str = "step %6d, epoch=%7.2f, loss=%10.6f, y_loss=%10.6f, " \
                         "f_loss = %10.6f (%6.1f examples/sec; %7.3f sec/batch)"
            tf.logging.info(
              format_str % (self._step, self._epoch, loss_value, y_val, f_val,
                            examples_per_sec, sec_per_batch)
            )

        if self.should_freeze():
          save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
Exemple #5
0
def train(task_id, method, file_dir):
    # support multi thread, each thread create a new session with new graph
    with tf.Session(graph=tf.Graph()) as sess:
        tf.keras.backend.set_session(sess)
        train_fc = train_func.get(method.lower(), None)
        if train_fc is None:
            TRAIN_LOGGER.info(
                "[task{}]invalid method. not classification or segmentation".
                format(task_id))
            return None
        model_file = train_fc(task_id, file_dir)
    gc.collect()
    #from tensorflow.keras import backend as K
    #tf.reset_default_graph()
    #K.clear_session()
    # save as .pb file
    if model_file is not None:
        saved_models_path = os.path.join(file_dir, TRAINED_MODEL_FOLDER)
        save_model(saved_models_path, SAVE_FINAL_MODEL_PB_FILE, model_file)

        # build android app
        images_dir = os.path.join(file_dir, IMAGE_FOLDER)
        label_file = os.path.join(file_dir, LABEL_INFO_FILE)
        image_dp = ImageDataPipeline(images_dir,
                                     label_file,
                                     image_size=IMAGE_SIZE)
        labels = ['' for _ in range(image_dp.labels_classes)]
        for key in image_dp.label_name_val_dict.keys():
            labels[image_dp.label_name_val_dict[key]] = key
        is_success, outputs = build_android_app(labels, saved_models_path,
                                                model_file)
        if not is_success:
            return None
        return outputs

    return model_file
Exemple #6
0
def train_with_multiple_gpus():
    """
  Train the KCNN model with mutiple gpus.
  """
    set_logging_configs(debug=FLAGS.debug,
                        logfile=join(FLAGS.train_dir, FLAGS.logfile))

    # Output the process id
    tf.logging.info("~pid={}".format(getpid()))

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # Get or create the global step variable to count the number of train()
        # calls. This equals the number of batches processed * FLAGS.num_gpus.
        global_step = tf.contrib.framework.get_or_create_global_step()

        # Create an optimizer that performs gradient descent.
        with tf.name_scope("Optimizer"):
            learning_rate = kcnn.get_learning_rate(global_step)
            opt = kcnn.get_optimizer(learning_rate)

        # Initialize the input pipeline.
        total_batch_size = FLAGS.batch_size * FLAGS.num_gpus
        num_examples = pipeline.get_dataset_size(FLAGS.dataset,
                                                 for_training=True)
        batch = pipeline.next_batch(for_training=True,
                                    shuffle=True,
                                    dataset_name=FLAGS.dataset,
                                    num_epochs=FLAGS.num_epochs,
                                    batch_size=total_batch_size)
        configs = pipeline.get_configs(for_training=True)
        params = extract_configs(configs, for_training=True)

        # Split the batch for each tower
        tensors_splits = get_splits(batch, num_splits=FLAGS.num_gpus)

        # Retain all non-tower summaries
        non_tower_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)

        # Calculate the gradients for each model tower.
        tower_grads = []
        summaries = []
        loss = None
        batchnorm_updates = []
        reuse_variables = False

        for i in range(FLAGS.first_gpu_id,
                       FLAGS.first_gpu_id + FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s%d' %
                                   (constants.TOWER_NAME, i)) as scope:

                    # Calculate the loss for one tower of the KCNN model.
                    # This function constructs the entire model but shares the variables
                    # across all towers.
                    loss = tower_loss(tensors_splits[i], params, scope,
                                      reuse_variables)

                    # Reuse variables for the next tower.
                    reuse_variables = True

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    # Retain the Batch Normalization updates operations only from the
                    # final tower. Ideally, we should grab the updates from all towers
                    # but these stats accumulate extremely fast so we can ignore the
                    # other stats from the other towers without significant detriment.
                    if FLAGS.normalizer and FLAGS.normalizer == 'batch_norm':
                        batchnorm_updates = tf.get_collection(
                            tf.GraphKeys.UPDATE_OPS, scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)
        summaries.extend(
            add_total_norm_summaries(grads, "yf", only_summary_total=False))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        # Add histograms for gradients.
        with tf.name_scope("Summary"):
            for grad, var in grads:
                if grad is not None:
                    summaries.append(
                        tf.summary.histogram(var.op.name + '/gradients', grad))
            for var in tf.trainable_variables():
                summaries.append(tf.summary.histogram(var.op.name, var))

        # Track the moving averages of all trainable variables.
        with tf.name_scope("average"):
            variable_averages = tf.train.ExponentialMovingAverage(
                constants.VARIABLE_MOVING_AVERAGE_DECAY, global_step)
            variables_averages_op = variable_averages.apply(
                tf.trainable_variables())

        # Group all updates to into a single train op.
        if FLAGS.normalizer and FLAGS.normalizer == 'batch_norm':
            batchnorm_updates_op = tf.group(*batchnorm_updates)
            train_op = tf.group(batchnorm_updates_op, apply_gradient_op,
                                variables_averages_op)
        else:
            train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Save the training flags
        save_training_flags(FLAGS.train_dir, FLAGS.flag_values_dict())

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=FLAGS.max_to_keep)

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge(summaries + non_tower_summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Restore the previous checkpoint
        start_step = 1
        if FLAGS.restore_training or FLAGS.restore_weights_from:
            start_step = restore_previous_checkpoint(sess, global_step)
        max_steps = int(FLAGS.num_epochs * num_examples / total_batch_size) + 1

        # Create the summary writer
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        tic = time.time()

        for step in range(start_step, max_steps):
            start_time = time.time()

            try:
                _, loss_value = sess.run([train_op, loss])
            except tf.errors.OutOfRangeError:
                tf.logging.info("Stop this training after {} epochs.".format(
                    FLAGS.num_epochs))
                checkpoint_path = join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                tf.logging.info("{}-{} saved".format(checkpoint_path, step))
                break

            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % FLAGS.log_frequency == 0:
                num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / FLAGS.num_gpus
                epoch = step * total_batch_size / num_examples
                sec_per_epoch = (time.time() - tic) / epoch
                format_str = "%s: step %6d, epoch=%7.2f, loss = %10.6f " \
                             "(%8.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)"
                tf.logging.info(
                    format_str %
                    (datetime.now(), step, epoch, loss_value, examples_per_sec,
                     sec_per_batch, sec_per_epoch))

            if step % FLAGS.save_frequency == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % (20 // FLAGS.num_gpus * FLAGS.save_frequency) == 0 or \
                    (step + 1) == max_steps:
                checkpoint_path = join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                tf.logging.info("{}-{} saved".format(checkpoint_path, step))

            if FLAGS.freeze_frequency > 0 and step > 0:
                if step % FLAGS.freeze_frequency == 0 or (step +
                                                          1) == max_steps:
                    save_model(FLAGS.train_dir, FLAGS.dataset,
                               FLAGS.conv_sizes)

        else:
            tf.logging.info('The maximum number of epochs already reached!')

        # Save the final model
        if FLAGS.freeze_frequency > 0:
            save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
Exemple #7
0
# -*- coding: utf-8 -*-
"""
Created on 2018/5/1 

@author: susmote
"""
from load_data import load_data
from train_model import lr_train_bgd
from save_model import save_model

if __name__ == "__main__":
    # 导入数据
    print("load data".center(30, '-'))
    feature, label = load_data("data.txt")
    # 训练LR模型
    print("training".center(30, '-'))
    w = lr_train_bgd(feature, label, 1000, 0.01)
    # 保存模型
    print("save model".center(30, '-'))
    save_model("weights", w)
Exemple #8
0
def build_model(device,
                img_size,
                channels,
                test_split,
                batch_size,
                workers,
                model_arch,
                epochs,
                learning_rate,
                swa,
                enable_scheduler,
                loss='BCEDiceLoss',
                all_data=False,
                tta=False):
    # create data loaders
    trainloader, testloader, validloader = build_dataloaders(
        image_size=(img_size, img_size),
        channels=channels,
        test_split=test_split,
        batch_size=batch_size,
        num_workers=workers,
        all_data=all_data,
        data_filepath='../siim-train-test/')

    # setup the device
    if device == None:
        device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    # initialize model
    if model_arch == 'UNet':
        model = UNet(num_classes=1,
                     depth=6,
                     start_filts=8,
                     merge_mode='concat')

    if model_arch == 'UNet11':
        model = UNet11(pretrained=True)

    if model_arch == 'UNet16':
        model = UNet16(num_classes=1, pretrained=True)

    if model_arch == 'AlbuNet':
        model = AlbuNet(num_classes=1, pretrained=True)

    if model_arch == 'NestedUNet':
        model = NestedUNet()

    if model_arch == 'Unet_2D':
        model = Unet_2D(n_channels=channels, n_classes=1)

    if model_arch == 'Res34Unetv4':
        model = Res34Unetv4()

    if model_arch == 'Res34Unetv3':
        model = Res34Unetv3()

    if model_arch == 'Res34Unetv5':
        model = Res34Unetv5()

    if model_arch == 'BrainUNet':
        model = brain_unet(pretrained=True)

    if model_arch == 'R2U_Net':
        model = R2U_Net()

    if model_arch == 'AttU_Net':
        model = AttU_Net()

    if model_arch == 'R2AttU_Net':
        model = R2AttU_Net()

    # setup criterion, optimizer and metrics
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if loss == 'BCEDiceLoss':
        criterion = BCEDiceLoss()

    if loss == 'LovaszSoftmaxLoss':
        criterion = LovaszSoftmaxLoss()

    if loss == 'JaccardLoss':
        criterion = JaccardLoss(device=device)

    if loss == 'mIoULoss':
        criterion = mIoULoss(n_classes=1)

    if loss == 'WeightedBCEDiceLoss':
        criterion = WeightedBCEDiceLoss()

    metric = iou_score

    #train model
    model, train_losses, test_losses, train_metrics, test_metrics = train(
        model,
        device,
        trainloader,
        testloader,
        optimizer,
        criterion,
        metric,
        epochs,
        learning_rate,
        swa=swa,
        enable_scheduler=enable_scheduler,
        model_arch=model_arch)

    # create submission
    filename = 'submission_' + model_arch + '_lr' + str(
        learning_rate) + '_' + str(epochs) + '.csv'
    print('Generating submission to ' + filename + '\n')
    thresholds, ious, index_max, threshold_max = determine_threshold(
        model,
        device,
        testloader,
        image_size=(img_size, img_size),
        channels=channels)
    make_submission(filename,
                    device,
                    model,
                    validloader,
                    image_size=(img_size, img_size),
                    channels=channels,
                    threshold=threshold_max,
                    original_size=1024,
                    tta=tta)

    # save the model
    save_model(model,
               model_arch,
               learning_rate,
               epochs,
               train_losses,
               test_losses,
               train_metrics,
               test_metrics,
               filepath='models_checkpoints')
Exemple #9
0
def build_from_checkpoint(filename,
                          device,
                          img_size,
                          channels,
                          test_split,
                          batch_size,
                          workers,
                          epochs,
                          learning_rate,
                          swa,
                          enable_scheduler,
                          loss='BCEDiceLoss',
                          all_data=False,
                          tta=False):

    # create data loaders
    trainloader, testloader, validloader = build_dataloaders(
        image_size=(img_size, img_size),
        channels=channels,
        test_split=test_split,
        batch_size=batch_size,
        num_workers=workers,
        all_data=all_data)

    # setup the device
    if device == None:
        device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    # restore model
    model, model_arch, train_losses_0, test_losses_0, train_metrics_0, test_metrics_0 = load_model(
        filename, device, channels=channels)

    # setup criterion, optimizer and metrics
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if loss == 'BCEDiceLoss':
        criterion = BCEDiceLoss()

    if loss == 'LovaszSoftmaxLoss':
        criterion = LovaszSoftmaxLoss()

    if loss == 'JaccardLoss':
        criterion = JaccardLoss(device=device)

    if loss == 'mIoULoss':
        criterion = mIoULoss(n_classes=1)

    if loss == 'WeightedBCEDiceLoss':
        criterion = WeightedBCEDiceLoss()

    metric = iou_score

    #train model
    model, train_losses, test_losses, train_metrics, test_metrics = train(
        model,
        device,
        trainloader,
        testloader,
        optimizer,
        criterion,
        metric,
        epochs,
        learning_rate,
        swa=swa,
        enable_scheduler=enable_scheduler,
        model_arch=model_arch)

    train_losses = train_losses + train_losses_0
    test_losses = test_losses + test_losses_0
    train_metrics = train_metrics + train_metrics_0
    test_metrics = test_metrics + test_metrics_0

    # create submission
    filename = 'submission_' + model_arch + '_lr' + str(
        learning_rate) + '_' + str(epochs) + '.csv'
    print('Generating submission to ' + filename + '\n')
    thresholds, ious, index_max, threshold_max = determine_threshold(
        model, device, testloader, image_size=(img_size, img_size))
    make_submission(filename,
                    device,
                    model,
                    validloader,
                    image_size=(img_size, img_size),
                    channels=channels,
                    threshold=threshold_max,
                    original_size=1024,
                    tta=tta)

    # save the model
    save_model(model,
               model_arch,
               learning_rate,
               epochs,
               train_losses,
               test_losses,
               train_metrics,
               test_metrics,
               filepath='models_checkpoints')
Exemple #10
0
def get_memory():
    return (process.memory_info().rss -
            process.memory_info().shared) / 1048576  #memory usage in bytes


x_train, x_test, train_lable, test_lable = load_data()  #Load data from folder

#for droprate in [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
for batchsize in [64, 128, 256, 512]:
    base_memory = get_memory()
    start_time = time.time()
    model, history = run_new_model(x_train,
                                   x_test,
                                   train_lable,
                                   test_lable,
                                   Batchnorm=False,
                                   Droprate=0.5,
                                   Batchsize=batchsize,
                                   epochs=20)
    end_time = time.time() - start_time
    memory = get_memory() - base_memory  #correct for data mem usage!

    print("--- %s seconds , %s mb memory in usage ---" % (end_time, memory))

    save_model(model, history, "batchsize_no_batchnorm_" + str(batchsize),
               end_time, memory)

    del model, history  #.fit() blows up ram
    gc.collect()  #memory management!
    clear_session()  #memory management!
    hist = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=training_epochs,
        validation_split=val_split,
    )
    acc, result = evaluate_model(
        X_test, y_test, categories, model, limit=-1, return_prediction_array=True
    )
    #    print(f'{"-"*80}\nPerformance on holdout set of {len(y_test)} images: \nAccuracy:{round(acc,6)}\nHistory: {hist.history}')

    # - - - SAVE THE MODEL (OPTIONAL)
    saveme = input("Save model? (y/n):  ")
    if "y" in saveme:
        save_model(model, categories, val_split, acc, hist)

    # - - - WHERE DOES THE MODEL GO WRONG? WHAT CHARS CAN WE CONSISTANTLY RECOGNIZE?
    yhat_probs = model.predict(X_test)
    cats = np.array(categories)
    top_3_pred = []
    bad_pred = []
    for i, y in enumerate(yhat_probs):
        true_label = cats[np.argmax(y_test[i])]
        idx = np.argsort(y)[::-1]
        top_cats = cats[idx]
        if top_cats[0] != true_label:
            print("\nBad prediction: ")
            print(y)
            print("TRUE LABEL: ", true_label)
            print(idx)
Exemple #12
0
def train():
    print('Gathering Arguments...')
    args = train_input_args()

    data_dir = args.data_dir
    print("data_dir:", data_dir)

    save_dir = args.save_dir
    print("save_dir:", save_dir)

    arch = args.arch
    print("arch:", arch)

    learning_rate = args.learning_rate
    print("learning_rate:", learning_rate)

    hidden_units = args.hidden_units
    print("hidden_units:", hidden_units)

    epochs = args.epochs
    print("epochs:", epochs)

    gpu = args.gpu
    print("gpu:", gpu)

    # ---------------------------------------------------------
    print('Setting up transforms...')

    data_types = ['train', 'valid', 'test']
    rotation = 30
    resize = 225
    crop_size = 224
    normalize_mean = [0.485, 0.456, 0.406]
    normalize_std = [0.229, 0.224, 0.225]

    # Define transforms for the training, validation, and testing sets
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomRotation(rotation),
            transforms.RandomResizedCrop(crop_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(normalize_mean, normalize_std)
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(resize),
            transforms.CenterCrop(crop_size),
            transforms.ToTensor(),
            transforms.Normalize(normalize_mean, normalize_std)
        ]),
        'test':
        transforms.Compose([
            transforms.Resize(resize),
            transforms.CenterCrop(crop_size),
            transforms.ToTensor(),
            transforms.Normalize(normalize_mean, normalize_std)
        ]),
    }

    # Load the datasets with ImageFolder
    image_datasets = {
        x: datasets.ImageFolder(data_dir + '/' + x,
                                transform=data_transforms[x])
        for x in data_types
    }

    # Using the image datasets and the transforms, define the dataloaders
    dataloaders = {
        x: torch.utils.data.DataLoader(image_datasets[x],
                                       batch_size=32,
                                       shuffle=True)
        for x in data_types
    }

    dataset_sizes = {x: len(image_datasets[x]) for x in data_types}

    # ---------------------------------------------------------
    print('Setting up Device & Models...')

    # set the device to what the cpu if the user requests it otherwise
    # default try to use cuda if the device is cuda capable.
    device = 'cpu' if gpu == 'cpu' else torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")
    print('Device:', device)

    resnet18 = models.resnet18(pretrained=True)
    alexnet = models.alexnet(pretrained=True)
    vgg16 = models.vgg16(pretrained=True)

    architectures = {'resnet': resnet18, 'alexnet': alexnet, 'vgg16': vgg16}

    model = architectures[arch]

    for param in model.parameters():
        param.requires_grad = False

    classifier = nn.Sequential(
        OrderedDict([('fc1', nn.Linear(25088, hidden_units)),
                     ('relu', nn.ReLU()), ('fc2', nn.Linear(hidden_units,
                                                            102)),
                     ('output', nn.LogSoftmax(dim=1))]))

    model.classifier = classifier

    # ---------------------------------------------------------

    # Criterion NLLLoss which is recommended with Softmax final layer
    criterion = nn.NLLLoss()
    # Observe that all parameters are being optimized
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    # Decay LR by a factor of 0.1 every 4 epochs
    scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
    # Set the model to the device
    model.to(device)

    # ---------------------------------------------------------
    print('Training the model...')

    # Train the model
    model_ft = train_model(model, criterion, optimizer, scheduler,
                           dataset_sizes, dataloaders, epochs, device)

    # ---------------------------------------------------------
    print('Saving the model...')

    save_model(model_ft, image_datasets, arch, save_dir)

    print('Saved model successfully!')
Exemple #13
0
from preprocessing import *
from save_model import save_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
# I put this program in the same folder as MLGame/games/arkaonid/ml
# you can edit path to get log folder
if __name__ == "__main__":
    # preprocessing
    data_set = get_dataset()
    X, y = combine_multiple_data(data_set)

    # %% training
    model = KNeighborsClassifier(n_neighbors=3)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model.fit(x_train, y_train)

    y_predict = model.predict(x_test)
    print("model:", model)
    print(accuracy_score(y_predict, y_test))

    # %% save the model
    save_model(model, "model.pickle")
Exemple #14
0
def train(model,
          device,
          trainloader,
          testloader,
          optimizer,
          criterion,
          metric,
          epochs,
          learning_rate,
          swa=True,
          enable_scheduler=True,
          model_arch=''):
    '''
    Function to perform model training.
    '''
    model.to(device)
    steps = 0
    running_loss = 0
    running_metric = 0
    print_every = 100

    train_losses = []
    test_losses = []
    train_metrics = []
    test_metrics = []

    if swa:
        # initialize stochastic weight averaging
        opt = SWA(optimizer)
    else:
        opt = optimizer

    # learning rate cosine annealing
    if enable_scheduler:
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   len(trainloader),
                                                   eta_min=0.0000001)

    for epoch in range(epochs):

        if enable_scheduler:
            scheduler.step()

        for inputs, labels in trainloader:

            steps += 1
            # Move input and label tensors to the default device
            inputs, labels = inputs.to(device), labels.to(device)

            opt.zero_grad()

            outputs = model.forward(inputs)
            loss = criterion(outputs, labels.float())
            loss.backward()
            opt.step()

            running_loss += loss
            running_metric += metric(outputs, labels.float())

            if steps % print_every == 0:
                test_loss = 0
                test_metric = 0
                model.eval()
                with torch.no_grad():
                    for inputs, labels in testloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model.forward(inputs)

                        test_loss += criterion(outputs, labels.float())

                        test_metric += metric(outputs, labels.float())

                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {running_loss/print_every:.3f}.. "
                      f"Test loss: {test_loss/len(testloader):.3f}.. "
                      f"Train metric: {running_metric/print_every:.3f}.. "
                      f"Test metric: {test_metric/len(testloader):.3f}.. ")

                train_losses.append(running_loss / print_every)
                test_losses.append(test_loss / len(testloader))
                train_metrics.append(running_metric / print_every)
                test_metrics.append(test_metric / len(testloader))

                running_loss = 0
                running_metric = 0

                model.train()
                if swa:
                    opt.update_swa()

        save_model(model,
                   model_arch,
                   learning_rate,
                   epochs,
                   train_losses,
                   test_losses,
                   train_metrics,
                   test_metrics,
                   filepath='models_checkpoints')

    if swa:
        opt.swap_swa_sgd()

    return model, train_losses, test_losses, train_metrics, test_metrics
Exemple #15
0
def main():
    print('Beginning program')

    # get config
    config = Config().config
    print('change lr:', config.change_lr)
    print('change bs:', config.change_bs)
    print('max epochs:', config.epochs)
    if config.change_bs and config.change_lr:
        print('[!] Whoops: both config.change_bs and config.change_lr are '
              'true -- at least one of them should be false.')
        return

    # get directories
    log_dir = get_log_dir(config)
    data_dir = get_data_dir()
    #train_dir = get_train_dir()
    #test_dir = get_test_dir()
    image_dir = get_celeba_dir()
    print('log dir:', log_dir)
    print('data dir:', data_dir)
    #print('train dir:', train_dir)
    #print('test dir:', test_dir)
    print('image_dir:', image_dir)

    # get data
    print('Loading data...')
    data_dict = get_celeba_data(data_dir)
    x_data, y_data = retrieve_celeba_data(data_dict=data_dict,
                                          image_dir=image_dir)
    x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                        y_data,
                                                        test_size=0.2,
                                                        shuffle=True)
    num_train = int(x_train.shape[0] * 0.8)
    print(f'Num training examples (excludes test and val): {num_train}')

    # build and save initial model
    input_dim = x_train[0].shape
    model = build_model(input_dim, config, model_type=config.complexity)
    save_model(log_dir=log_dir, config=config, model=model)

    # set variables
    val_loss = []
    val_acc = []
    loss = []
    acc = []
    lr = []
    bs = []
    max_epochs = config.epochs
    batch_size = config.batch_size
    batch_size_mult = 2
    epoch_iter = 1

    # get callbacks
    callbacks = Callbacks(config, log_dir).callbacks
    print('callbacks:')
    for callback in callbacks:
        print('\t', callback)

    # train model
    if config.change_lr:  # reduce_lr callback takes care of everything for us
        print('Will change learning rate during training, but not batch size')
        print('Training model...')
        history = model.fit(x_data,
                            y_data,
                            epochs=max_epochs,
                            batch_size=batch_size,
                            shuffle=True,
                            validation_split=0.2,
                            verbose=1,
                            callbacks=callbacks)
        # store history (bs is constant)
        val_loss += history.history['val_loss']
        val_acc += history.history['val_acc']
        loss += history.history['loss']
        acc += history.history['acc']
        lr += history.history['lr']
        bs = [batch_size for i in range(len(history.epoch))]

    elif config.change_bs:  # need to manually stop and restart training
        print('Will change batch size during training, but not learning rate')
        while max_epochs >= epoch_iter:
            print(
                f'Currently at epoch {epoch_iter} of {max_epochs}, batch size is {batch_size}'
            )
            epochs = max_epochs - epoch_iter + 1
            history = model.fit(x_data,
                                y_data,
                                epochs=epochs,
                                batch_size=batch_size,
                                shuffle=True,
                                validation_split=0.2,
                                verbose=1,
                                callbacks=callbacks)
            # store history
            val_loss += history.history['val_loss']
            val_acc += history.history['val_acc']
            loss += history.history['loss']
            acc += history.history['acc']
            bs += [batch_size for i in range(len(history.epoch))]

            # update training parameters
            epoch_iter += len(history.epoch)
            batch_size *= batch_size_mult
            batch_size = batch_size if batch_size < num_train else num_train

        # store lr history as constant (because it is)
        lr = [0.001 for i in range(len(bs))]

    else:
        print('Will not change learning rate nor batch size during training')
        print('Training model...')
        history = model.fit(x_data,
                            y_data,
                            epochs=max_epochs,
                            batch_size=batch_size,
                            shuffle=True,
                            validation_split=0.2,
                            verbose=1,
                            callbacks=callbacks)
        # store history (bs is constant)
        val_loss += history.history['val_loss']
        val_acc += history.history['val_acc']
        loss += history.history['loss']
        acc += history.history['acc']
        lr = [0.001 for i in range(len(history.epoch))]
        bs = [batch_size for i in range(len(history.epoch))]

    print('Completed training')

    # save finished model -- overrides original model saved before training
    save_model(log_dir=log_dir, config=config, model=model)

    # save loss, accuracy, lr, and bs values across epochs as json;
    # have to force cast lr vals as float64 because history object saves them
    # as float32, and json.dump() is not compatible with float32
    acc_loss_lr_bs = {
        'val_loss': val_loss,
        'val_acc': val_acc,
        'loss': loss,
        'acc': acc,
        'lr': [np.float64(i) for i in lr],
        'bs': bs
    }
    acc_loss_lr_bs_path = os.path.join(log_dir, 'acc_loss_lr_bs.json')
    with open(acc_loss_lr_bs_path, 'w') as f:
        json.dump(acc_loss_lr_bs, f, indent=4, sort_keys=True)

    # evaluate model (on original batch size)
    print('Calculating final score...')
    #x_data, y_data = retrieve_data(data_dict=data_dict, image_dir=test_dir)
    score = model.evaluate(x_test, y_test, batch_size=config.batch_size)
    print('Final score:', score)

    print('Completed program')

    return
Exemple #16
0
def train_model():
  """
  Train the neural network model.
  """

  set_logging_configs(
    debug=FLAGS.debug,
    logfile=join(FLAGS.train_dir, FLAGS.logfile)
  )

  with tf.Graph().as_default():

    # Get the global step
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Inference the kCON energy model
    y_calc, y_true, y_weights, f_calc, f_true, n_atom = kcnn_from_dataset(
      FLAGS.dataset,
      for_training=True,
      num_epochs=FLAGS.num_epochs
    )

    # Cast `y_true` and `f_true` to `tf.float32` and set the shape of the
    # `y_calc` explicitly.
    y_calc.set_shape(y_true.get_shape().as_list())
    y_true = tf.cast(y_true, tf.float32)
    if f_true is not None:
      f_true = tf.cast(f_true, tf.float32)

    # Setup the loss function
    y_loss = None
    f_loss = None

    if not FLAGS.forces:
      total_loss = kcnn.get_y_loss(y_true, y_calc, y_weights)

    elif FLAGS.forces_only:
      total_loss = kcnn.get_f_loss(f_true, f_calc)

    elif FLAGS.amp:
      total_loss, y_loss, f_loss = kcnn.get_amp_yf_joint_loss(
        y_true, y_calc, f_true, f_calc, n_atom
      )

    else:
      total_loss, y_loss, f_loss = kcnn.get_yf_joint_loss(
        y_true, y_calc, f_true, f_calc
      )

    # Build a Graph that trains the model.
    if FLAGS.forces and FLAGS.alter_train_op:
      train_op = kcnn.get_yf_train_op(total_loss, y_loss, f_loss, global_step)
    else:
      train_op = kcnn.get_joint_loss_train_op(total_loss, global_step)

    # Save the training flags
    if tf.__version__ >= "1.6.0":
      save_training_flags(FLAGS.train_dir, FLAGS.flag_values_dict())
    else:
      save_training_flags(FLAGS.train_dir, dict(FLAGS.__dict__["__flags"]))

    # Get the total number of training examples
    num_examples = pipeline.get_dataset_size(FLAGS.dataset)
    max_steps = int(num_examples * FLAGS.num_epochs / FLAGS.batch_size)

    class RunHook(tf.train.SessionRunHook):
      """ Log loss and runtime and regularly freeze the model. """

      def __init__(self, atomic_forces=False, should_freeze=True):
        """
        Initialization method.
        """
        super(RunHook, self).__init__()
        self._step = -1
        self._tic = time.time()
        self._start_time = 0
        self._epoch = 0.0
        self._epoch_per_step = FLAGS.batch_size / num_examples
        self._log_frequency = FLAGS.log_frequency
        self._should_freeze = should_freeze
        self._freeze_frequency = FLAGS.freeze_frequency
        self._atomic_forces = atomic_forces

      def begin(self):
        """
        Called once before using the session.
        """
        self._step = -2

      def before_run(self, run_context):
        """
        Called before each call to run().

        Args:
          run_context: a `tf.train.SessionRunContext` as the context to execute
            ops and tensors.

        Returns:
          args: a `tf.train.SessionRunArgs` as the ops and tensors to execute
            under `run_context`.

        """
        self._step += 1
        self._epoch = self._step * self._epoch_per_step
        self._start_time = time.time()

        if not self._atomic_forces or FLAGS.forces_only:
          return tf.train.SessionRunArgs({"loss": total_loss,
                                          "global_step": global_step})
        else:
          return tf.train.SessionRunArgs({"loss": total_loss,
                                          "y_loss": y_loss,
                                          "f_loss": f_loss,
                                          "global_step": global_step})

      def should_log(self):
        """
        Return True if we should log the stats of current step.
        """
        return self._step % self._log_frequency == 0

      def should_freeze(self):
        """
        Return True if we should freeze the current graph and values.
        """
        return self._should_freeze and self._step % self._freeze_frequency == 0

      def after_run(self, run_context, run_values):
        """
        Called after each call to run().

        Args:
          run_context: a `tf.train.SessionRunContext` as the context to execute
           ops and tensors.
          run_values: results of requested ops/tensors by `before_run()`.

        """
        if self._step < 0:
          self._step = run_values.results["global_step"]

        duration = time.time() - self._start_time
        loss_value = run_values.results["loss"]
        num_examples_per_step = FLAGS.batch_size

        if self.should_log():
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)
          sec_per_epoch = (time.time() - self._tic) / self._epoch

          if not self._atomic_forces or FLAGS.forces_only:
            format_str = "step %6d, epoch=%7.2f, loss=%10.6f " \
                       "(%6.1f examples/sec; %8.3f sec/batch, %8.3f sec/epoch)"
            tf.logging.info(
              format_str % (self._step, self._epoch, loss_value,
                            examples_per_sec, sec_per_batch, sec_per_epoch)
            )
          else:
            y_val = run_values.results['y_loss']
            f_val = run_values.results['f_loss']

            format_str = "step %6d, epoch=%7.2f, loss=%10.6f, y_loss=%10.6f, " \
                         "f_loss = %10.6f (%6.1f examples/sec; %7.3f sec/batch)"
            tf.logging.info(
              format_str % (self._step, self._epoch, loss_value, y_val, f_val,
                            examples_per_sec, sec_per_batch)
            )

        if self.should_freeze():
          save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)

    run_meta = tf.RunMetadata()
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    if FLAGS.restore_2body_only:
      var_list = []
      for var in tf.global_variables():
        vk = get_k_from_var(var)
        if vk == -1 or vk == 2:
          var_list.append(var)
    else:
      var_list = tf.global_variables()

    scaffold = tf.train.Scaffold(
      saver=tf.train.Saver(max_to_keep=FLAGS.max_to_keep, var_list=var_list))

    # noinspection PyMissingOrEmptyDocstring
    class TimelineHook(tf.train.SessionRunHook):
      """ A hook to output tracing results for further performance analysis. """

      def __init__(self):
        super(TimelineHook, self).__init__()
        self._counter = -1

      def begin(self):
        self._counter = -1

      def get_ctf(self):
        return join(FLAGS.train_dir, "prof_%d.json" % self._counter)

      def should_save(self):
        return FLAGS.timeline and self._counter % FLAGS.save_frequency == 0

      def after_run(self, run_context, run_values):
        self._counter += 1
        if self.should_save():
          timeline = Timeline(step_stats=run_meta.step_stats)
          ctf = timeline.generate_chrome_trace_format(show_memory=True)
          with open(self.get_ctf(), "w+") as f:
            f.write(ctf)

    export_graph = True if FLAGS.freeze_frequency else False

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        save_summaries_steps=FLAGS.save_frequency,
        hooks=[RunHook(should_freeze=export_graph,
                       atomic_forces=FLAGS.forces),
               TimelineHook(),
               tf.train.StopAtStepHook(last_step=max_steps)],
        scaffold=scaffold,
        config=tf.ConfigProto(
          log_device_placement=FLAGS.log_device_placement,
          allow_soft_placement=True)) as mon_sess:

      while not mon_sess.should_stop():
        try:
          if FLAGS.timeline:
            mon_sess.run(
              train_op, options=run_options, run_metadata=run_meta
            )
          else:
            mon_sess.run(train_op)
        except tf.errors.OutOfRangeError:
          tf.logging.info(
            "Stop this training after {} epochs.".format(FLAGS.num_epochs))
          break

  # Do not forget to export the final model
  if export_graph:
    save_model(FLAGS.train_dir, FLAGS.dataset, FLAGS.conv_sizes)
Exemple #17
0
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)

    loss = []

    def write_list(filename, data):
        thefile = open(filename, 'a')
        thefile.write("%s\n" % data)

    for epoch in range(num_epochs):
        train_loss = 0
        for data in dataloader:
            img, _ = data
            img = Variable(img).cuda()
            # ===================forward=====================
            output = model(img)
            loss = criterion(output, img)
            train_loss += loss.data[0]
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # ===================log========================
        write_list("loss.txt", train_loss / len(dataloader.dataset))
        print('epoch [{}/{}], loss:{:.4f}'.format(
            epoch + 1, num_epochs, train_loss / len(dataloader.dataset)))
        on_epoch_end(epoch, output, size, img)
        save_model(epoch, model)
Exemple #18
0
    def train(self):
        pos_features = []
        neg_features = []
        print(self.pos_path)
        print(self.neg_path)
        # pos_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(self.pos_path) for file in files]
        # neg_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(self.neg_path) for file in files]

        for file in os.listdir(self.pos_path):
            path = os.path.join(self.pos_path, file)
            image = cv2.imread(path)
            pos_features.append(extract_features(image, self.config))

        for file in os.listdir(self.neg_path):
            path = os.path.join(self.neg_path, file)
            image = cv2.imread(path)
            neg_features.append(extract_features(image, self.config))

            # random shuffling of the features
        random.shuffle(pos_features)
        random.shuffle(neg_features)

        print("{} positive features, {} negative features \n".format(
            len(pos_features), len(neg_features)))

        print("scaling.... \n")

        xscaler = StandardScaler().fit(pos_features + neg_features)
        pos_features = xscaler.transform(pos_features)
        neg_features = xscaler.transform(neg_features)

        print("Saving features to file Features \n")
        file = "D:\Sabahuddin\svm_hog_speed\FEATURE_DATA.p"
        try:
            pickle.dump({
                "positive": pos_features,
                "negative": neg_features
            }, open(file, 'wb'))
            print("Feature Data saved to {}".format(file))
        except Exception as e:
            print('Failed to save the model at the destination file {}:{}'.
                  format(file, e))
            raise

        features = np.vstack((pos_features, neg_features)).astype(float)
        labels = np.hstack(
            (np.ones(len(pos_features)), np.zeros(len(neg_features))))

        print(" splitting the features into train and validation sets... \n")
        xtrain, xtest, ytrain, ytest = train_test_split(features,
                                                        labels,
                                                        test_size=0.3,
                                                        random_state=42)

        print(" size of train set {}".format(len(xtrain)))
        print(" size of test set {}".format(len(xtest)))

        svm = LinearSVC(max_iter=3000,
                        C=1,
                        loss="squared_hinge",
                        penalty='l1',
                        dual=False,
                        fit_intercept=False)
        start_time = time.time()
        print(" training the classifier with the train set... \n")
        svm.fit(xtrain, ytrain)
        print(" trained in {:.1f}s".format(time.time() - start_time))
        # ytest = ytest.reshape(1, -1)
        prediction = svm.predict(xtest)
        print("prediction \n", prediction)
        print("ytest \n", ytest)
        print("validation accuracy is {:f}".format(svm.score(xtest, ytest)))

        # clf_model, scaler, file, config
        save_model(svm, xscaler,
                   'D:\Sabahuddin\svm_hog_speed\MODEL_SVM_HOG_try1.p',
                   self.config)
Exemple #19
0
    acc = 0
    val_split = .1
    while acc < .90:
        print("Building model...")
        model = build_model(X_train,num_categories =num_categories, filter_size=filter_size)
        print("Fiting training data to model...")
        model.fit(X_train,y_train,batch_size= 64, epochs = 3, validation_split = val_split)
        # - - - EVALUATE MODEL FOR ACCURACY AGAINST HOLDOUT SET
        print('Evaluating trained model against holdout dataset...')
        acc,result = evaluate_model(X_test,y_test, categories, model,limit = -1, return_prediction_array=True)
        print(f'{"-"*80}\nAccuracy on holdout set: {round(acc,6)}')
    
    # - - - SAVE THE MODEL (OPTIONAL)
    saveme= input('Save model? (y/n):  ')
    if 'y' in saveme:
        save_model(model)
        # today = str(dt.now().date())
        # timestamp = str(dt.now().date()) + "T:"+ str(dt.now().time())[0:8] 
        # model.save(f'../models/simpleCNN-{timestamp}.h5')  # creates a HDF5 file 'my_model.h5'
        # print(f"Saved as models/simpleCNN-{timestamp}.h5")
        # with open(f'../models/reports/simpleCNN-{timestamp}.txt','w') as f:
        #     f.write(f'Classes in data: {categories}\n')
        #     f.write(f'Train-to-Holdout Ratio: {1-val_split}\n')
        #     f.write(f'Holdout Accuracy: {acc}\n')
        #     f.close()
        #     print(f"Report of model saved at models/reports/simpleCNN-{timestamp}.txt ")

    '''
    REFERENCE: 

    For reading in a pickeled file:
Exemple #20
0
from sklearn import svm
# I put this program in the same folder as MLGame/games/arkaonid/ml
# you can edit path to get log folder
if __name__ == "__main__":
    # preprocessing
    data_set = get_dataset()
    X, y = combine_multiple_data(data_set)

    # %% training
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    param_grid = {
        'weights': ('uniform', 'distance'),
        'algorithm': ('auto', 'ball_tree', 'kd_tree', 'brute'),
        #   'gamma': [0.1, 1, 10],
        #   'epsilon': [0.01, 0.05, 0.1, 0.5, 1.0]
    }
    knn = KNeighborsClassifier(n_neighbors=3)

    gclf = GridSearchCV(knn, param_grid, cv=5)
    gclf.fit(x_train, y_train)
    y_predict = gclf.predict(x_test)

    # extract the best parameters
    bestModel = gclf.best_estimator_
    best_score = gclf.best_score_
    print("Best Model:", bestModel)
    print("Training score:", best_score)
    print("Test score", accuracy_score(y_predict, y_test))
    # %% save the model
    save_model(bestModel, "model.pickle")