예제 #1
0
def eval(config):
    # Files path
    model_file_path = f"{config['model.path']}"

    data = load(config, datagen_flow=True)

    # Determine device
    if config['data.cuda']:
        cuda_num = config['data.gpu']
        device_name = f'GPU:{cuda_num}'
    else:
        device_name = 'CPU:0'

    if config['data.weight_classes']:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        loss_object = weighted_loss(loss_object, data["class_weights"])
    else:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    optimizer = tf.keras.optimizers.Adam()
    model = create_model(
        model_name=config['model.name'],
        weights=config['model.weights'],
        nb_classes=data["nb_classes"],
        image_shape=data["image_shape"],
        optimizer=optimizer,
        loss_object=loss_object,
    )
    model.load_weights(model_file_path)
    # model.summary()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    _, _, test_step = steps(model,
                            loss_object,
                            optimizer,
                            train_loss=train_loss,
                            train_accuracy=train_accuracy,
                            test_loss=test_loss,
                            test_accuracy=test_accuracy,
                            engine=config['engine'])

    with tf.device(device_name):
        batches = 0
        for test_images, test_labels in data["test_gen"]:
            test_step(test_images, test_labels)
            batches += 1
            if batches >= data["test_size"] / config['data.batch_size']:
                # we need to break the loop by hand because
                # the generator loops indefinitely
                break

    print('Test Loss: {} Test Acc: {}'.format(test_loss.result(),
                                              test_accuracy.result() * 100))
예제 #2
0
def eval(config):
    # Files path
    model_file = config['model.path']
    data_dir = "data/"

    _, test_features, _ = load(data_dir, config, use_feature_transform=True)

    model = tf.keras.models.load_model(model_file)
    model.summary()

    if config['model.weights_save_path'] != "":
        model.save_weights(config['model.weights_save_path'])

    if config['model.json_save_path'] != "":
        tfjs.converters.save_keras_model(model, config['model.json_save_path'])

    predictions = tf.round(model.predict(test_features)).numpy().flatten()
    print(predictions)
예제 #3
0
def eval(config):
    # Files path
    model_file = config['model.path']
    data_dir = "data/"

    _, X, y = load(data_dir, config, numeric=True)

    model = tf.keras.models.load_model(model_file)
    model.summary()

    if config['model.weights_save_path'] != "":
        model.save_weights(config['model.weights_save_path'])

    if config['model.json_save_path'] != "":
        tfjs.converters.save_keras_model(model, config['model.json_save_path'])

    predictions = tf.round(model.predict(X)).numpy().flatten()
    print('Results for Binary Model')
    print(accuracy_score(y, predictions))
    print(classification_report(y, predictions))
예제 #4
0
def eval(config):
    # Files path
    model_file_path = f"{config['model.path']}"
    data_dir = f"data/"

    _, _, test, nb_classes, image_shape, class_weights = load(
        dataset_name=config['data.dataset'],
        batch_size=config['data.batch_size'],
        train_size=config['data.train_size'],
        test_size=config['data.test_size'],
        weight_classes=config['data.weight_classes'],
        datagen_flow=True,
    )

    (test_gen, test_len, _) = test

    # Determine device
    if config['data.cuda']:
        cuda_num = config['data.gpu']
        device_name = f'GPU:{cuda_num}'
    else:
        device_name = 'CPU:0'

    if config['data.weight_classes']:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        loss_object = weightedLoss(loss_object, class_weights)
    else:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    optimizer = tf.keras.optimizers.Adam()
    model = densenet_model(classes=nb_classes,
                           shape=image_shape,
                           growth_rate=config['model.growth_rate'],
                           nb_layers=config['model.nb_layers'],
                           reduction=config['model.reduction'])
    model.load_weights(model_file_path)
    model.summary()

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    _, test_step = steps(model,
                         loss_object,
                         optimizer,
                         test_loss=test_loss,
                         test_accuracy=test_accuracy)

    print("Starting evaluation")

    batches = 0
    for test_images, test_labels in test_gen:
        test_step(test_images, test_labels)
        batches += 1
        if batches >= test_len / config['data.batch_size']:
            # we need to break the loop by hand because
            # the generator loops indefinitely
            break

    print('Test Loss: {} Test Acc: {}'.format(test_loss.result(),
                                              test_accuracy.result() * 100))
def train(config):
    np.random.seed(2020)
    tf.random.set_seed(2020)

    # Useful data
    now = datetime.now()
    now_as_str = now.strftime('%y_%m_%d-%H:%M:%S')

    # Output files
    checkpoint_path = config['model.save_path']
    config_path = config['output.config_path'].format(date=now_as_str)
    csv_output_path = config['output.train_path'].format(date=now_as_str)
    tensorboard_summary_dir = config['summary.save_path']
    summary_path = "results/summary.csv"

    # Output dirs
    data_dir = "data/"
    config_dir = config_path[:config_path.rfind('/')]
    output_dir = csv_output_path[:csv_output_path.rfind('/')]

    # Create folder for config
    if not os.path.exists(config_dir):
        os.makedirs(config_dir)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # generate config file
    file = open(config_path, 'w')
    file.write(json.dumps(config, indent=2))
    file.close()

    file = open(csv_output_path, 'w')
    file.write("")
    file.close()

    # create summary file if not exists
    if not os.path.exists(summary_path):
        file = open(summary_path, 'w')
        file.write("datetime, model, config, acc_std, acc_mean\n")
        file.close()

    # Data loader
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    _, X, y = load(data_dir, config, numeric=True)

    # Defines datasets on the input data.
    batch_size = config['data.batch_size']

    # Determine device
    if config['data.cuda']:
        cuda_num = config['data.gpu']
        device_name = f'GPU:{cuda_num}'
    else:
        device_name = 'CPU:0'

    time_start = time.time()

    # define 10-fold cross validation test harness
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    cvscores = []
    print("Running model performance validation... please wait!")

    for split, (train_index, test_index) in enumerate(skf.split(X, y)):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Compiles a model, prints the model summary, and saves the model diagram into a png file.
        input_shape = (X_train.shape[1], )
        model = create_model(input_shape=input_shape,
                             learning_rate=config['train.lr'])
        model.summary()

        split_checkpoint_path = checkpoint_path.format(split=split)
        split_results_path = csv_output_path.format(split=split)

        split_checkpoint_dir = split_checkpoint_path[:split_checkpoint_path.
                                                     rfind('/')]
        split_results_dir = split_results_path[:split_results_path.rfind('/')]

        # Create folder for model
        if not os.path.exists(split_checkpoint_dir):
            os.makedirs(split_checkpoint_dir)

        # Create output for train process
        if not os.path.exists(split_results_dir):
            os.makedirs(split_results_dir)

        tf.keras.utils.plot_model(model,
                                  os.path.join(split_results_dir,
                                               "keras_model.png"),
                                  show_shapes=True,
                                  show_layer_names=False)

        callbacks = create_callbacks(
            tensorboard_summary_dir.format(split=split),
            split_results_path,
            split_checkpoint_path,
            patience=config['train.patience'])

        # Fit the model
        with tf.device(device_name):
            history = model.fit(X_train,
                                y_train,
                                validation_split=0.1,
                                epochs=config['train.epochs'],
                                batch_size=config['data.batch_size'],
                                use_multiprocessing=True,
                                callbacks=callbacks)

        # evaluate the model
        scores = model.evaluate(X_test, y_test, verbose=0)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
        cvscores.append(scores[1] * 100)

        # Runs prediction on test data.
        predictions = tf.round(model.predict(X_test)).numpy().flatten()
        print("Predictions on test data:")
        print(predictions)

        model_path = tf.train.latest_checkpoint(
            split_checkpoint_dir, latest_filename=split_checkpoint_path)

        if not model_path:
            print("Skipping evaluation. No checkpoint found in: {}".format(
                split_checkpoint_dir))
        else:
            model_from_saved = tf.keras.models.load_model(model_path)
            model_from_saved.summary()

            # Runs test data through the reloaded model to make sure the results are same.
            predictions_from_saved = tf.round(
                model_from_saved.predict(X_test)).numpy().flatten()
            np.testing.assert_array_equal(predictions_from_saved, predictions)

    print("Done.")
    print("Summary report on mean and std.")
    # The average and standard deviation of the model performance
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

    time_end = time.time()

    summary = "{}, {}, df, {}, {}, {}\n".format(now_as_str,
                                                config['data.dataset'],
                                                config_path, np.std(cvscores),
                                                np.mean(cvscores))
    print(summary)
    print(cvscores)

    file = open(summary_path, 'a+')
    file.write(summary)
    file.close()

    elapsed = time_end - time_start
    h, min = elapsed // 3600, elapsed % 3600 // 60
    sec = elapsed - min * 60

    print(f"Training took: {h:.2f}h {min:.2f}m {sec:.2f}s!")
예제 #6
0
def train(config):
    data = load(config,
                datagen_flow=True,
                with_datasets=config['engine'] == 'maml')

    # Determine device
    if config['data.cuda']:
        cuda_num = config['data.gpu']
        device_name = f'GPU:{cuda_num}'
    else:
        device_name = 'CPU:0'

    if config['data.weight_classes']:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        loss_object = weighted_loss(loss_object, data["class_weights"])
    else:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    optimizer = tf.keras.optimizers.Adam()

    time_start = time.time()
    # Compiles a model, prints the model summary, and saves the model diagram into a png file.
    model = densenet_model(classes=data["nb_classes"],
                           shape=data["image_shape"],
                           growth_rate=config['model.growth_rate'],
                           nb_layers=config['model.nb_layers'],
                           reduction=config['model.reduction'])
    # model.summary()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')
    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='val_accuracy')

    with tf.device(device_name):
        train_engine.train(
            log_info=config,
            model=model,
            batch_size=config['data.batch_size'],
            epochs=config['train.epochs'],
            max_patience=config['train.patience'],
            engine=config['engine'],
            lr=config['train.lr'],
            train_loss=train_loss,
            train_accuracy=train_accuracy,
            test_loss=val_loss,
            test_accuracy=val_accuracy,
            val_loss=val_loss,
            val_accuracy=val_accuracy,
            optimizer=optimizer,
            loss_object=loss_object,
            **data,
        )

    time_end = time.time()

    elapsed = time_end - time_start
    h, min = elapsed // 3600, elapsed % 3600 // 60
    sec = elapsed - min * 60

    print(f"Training took: {h:.2f}h {min:.2f}m {sec:.2f}s!")
예제 #7
0
def train(config):
    np.random.seed(2020)
    tf.random.set_seed(2020)

    # Useful data
    now = datetime.now()
    now_as_str = now.strftime('%y_%m_%d-%H:%M:%S')

    # Output files
    checkpoint_path = f"{config['model.save_path']}"
    config_path = f"{config['output.config_path'].format(now_as_str)}"
    csv_output_path = f"{config['output.train_path'].format(now_as_str)}"
    train_summary_file_path = f"{config['summary.save_path'].format('train', config['data.dataset'], config['model.name'], config['model.type'], now_as_str)}"
    test_summary_file_path = f"{config['summary.save_path'].format('test', config['data.dataset'], config['model.name'], config['model.type'], now_as_str)}"
    summary_path = f"results/summary.csv"

    # Output dirs
    data_dir = f"data/"
    checkpoint_dir = checkpoint_path[:checkpoint_path.rfind('/')]
    config_dir = config_path[:config_path.rfind('/')]
    results_dir = csv_output_path[:csv_output_path.rfind('/')]

    # Create folder for model
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # Create output for train process
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    file = open(f"{csv_output_path}", 'w')
    file.write("")
    file.close()

    # Create folder for config
    if not os.path.exists(config_dir):
        os.makedirs(config_dir)

    # generate config file
    file = open(config_path, 'w')
    file.write(json.dumps(config, indent=2))
    file.close()

    # create summary file if not exists
    if not os.path.exists(summary_path):
        file = open(summary_path, 'w')
        file.write("datetime, model, config, min_loss, min_loss_accuracy\n")
        file.close()

    # Data loader
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    train, val, _, nb_classes, image_shape, class_weights = load(
        dataset_name=config['data.dataset'],
        dataset_dir=config['data.dataset_dir'],
        batch_size=config['data.batch_size'],
        train_size=config['data.train_size'],
        test_size=config['data.test_size'],
        weight_classes=config['data.weight_classes'],
        rotation_range=config['data.rotation_range'],
        width_shift_range=config['data.width_shift_range'],
        height_shift_range=config['data.height_shift_range'],
        horizontal_flip=config['data.horizontal_flip'],
        datagen_flow=True,
    )

    (train_gen, train_len, _) = train
    (val_gen, val_len, _) = val

    # Determine device
    if config['data.cuda']:
        cuda_num = config['data.gpu']
        device_name = f'GPU:{cuda_num}'
    else:
        device_name = 'CPU:0'

    if config['data.weight_classes']:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        loss_object = weightedLoss(loss_object, class_weights)
    else:
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    optimizer = tf.keras.optimizers.Adam()

    time_start = time.time()
    # Compiles a model, prints the model summary, and saves the model diagram into a png file.
    model = densenet_model(classes=nb_classes,
                           shape=image_shape,
                           growth_rate=config['model.growth_rate'],
                           nb_layers=config['model.nb_layers'],
                           reduction=config['model.reduction'])
    model.summary()

    tf.keras.utils.plot_model(model,
                              "{}/model.png".format(results_dir),
                              show_shapes=True)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')
    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='val_accuracy')

    train_step, test_step = steps(model, loss_object, optimizer, train_loss,
                                  train_accuracy, val_loss, val_accuracy)

    # create summary writers
    train_summary_writer = tf.summary.create_file_writer(
        train_summary_file_path)
    val_summary_writer = tf.summary.create_file_writer(test_summary_file_path)

    print("Starting training")

    loss, acc = train_engine.train(
        model=model,
        batch_size=config['data.batch_size'],
        epochs=config['train.epochs'],
        max_patience=config['train.patience'],
        train_gen=train_gen,
        train_len=train_len,
        val_gen=val_gen,
        val_len=val_len,
        train_loss=train_loss,
        train_accuracy=train_accuracy,
        val_loss=val_loss,
        val_accuracy=val_accuracy,
        train_step=train_step,
        test_step=test_step,
        checkpoint_path=checkpoint_path,
        train_summary_writer=train_summary_writer,
        val_summary_writer=val_summary_writer,
        csv_output_file=csv_output_path,
    )

    time_end = time.time()

    summary = "{}, {}, {}, {}, {}, {}\n".format(now_as_str,
                                                config['data.dataset'],
                                                config['model.name'],
                                                config_path, loss, acc)
    print(summary)

    file = open(summary_path, 'a+')
    file.write(summary)
    file.close()

    model_path = tf.train.latest_checkpoint(checkpoint_dir,
                                            latest_filename=checkpoint_path)

    if not model_path:
        print("Skipping evaluation. No checkpoint found in: {}".format(
            checkpoint_dir))
    else:
        model_from_saved = tf.keras.models.load_model(model_path)
        model_from_saved.summary()

        # Runs test data through the reloaded model to make sure the results are same.
        predictions_from_saved = model_from_saved.predict(val_gen)

    elapsed = time_end - time_start
    h, min = elapsed // 3600, elapsed % 3600 // 60
    sec = elapsed - min * 60

    print(f"Training took: {h:.2f}h {min:.2f}m {sec:.2f}s!")
예제 #8
0
def train(dataset_name,
          loss_name,
          block_size,
          partition_rule,
          selection_rule,
          update_rule,
          n_iters,
          L1,
          L2,
          optimal=None,
          datasets_path=""):

    np.random.seed(1)
    # load dataset
    dataset = datasets.load(dataset_name, path=datasets_path)
    A, b, args = dataset["A"], dataset["b"], dataset["args"]

    args.update({
        "L2": L2,
        "L1": L1,
        "block_size": block_size,
        "update_rule": update_rule
    })

    # loss function
    lossObject = losses.create_lossObject(loss_name, A, b, args)

    # Get partitions
    partition = partition_rules.get_partition(A,
                                              b,
                                              lossObject,
                                              block_size,
                                              p_rule=partition_rule)

    # Initialize x
    x = np.zeros(lossObject.n_params)

    score_list = []

    pbar = tqdm(desc="starting", total=n_iters, leave=True)

    ###### TRAINING STARTS HERE ############
    block = np.array([])
    for i in range(n_iters + 1):
        # Compute loss
        loss = lossObject.f_func(x, A, b)
        dis2opt = loss - \
            exp_configs.OPTIMAL_LOSS[dataset_name + "_" + loss_name]
        score_list += [{"loss": loss, "iteration": i, "selected": block}]

        stdout = ("%d - %s_%s_%s - dis2opt:%.16f - nz: %d/%d" %
                  (i, partition_rule, selection_rule, update_rule, dis2opt,
                   (x != 0).sum(), x.size))
        print(stdout)

        # Check convergence
        if (i > 5 and (np.array_equal(work, np.where(x > 1e-16)[0]))):
            score_list[-1]["converged"] = dis2opt
        if (i > 5 and (dis2opt == 0 or dis2opt < 1e-8)):
            break

        # Check increase
        if (i > 0) and (loss > score_list[-1]["loss"] + 1e-6):
            raise ValueError("loss value has increased...")

        # Select block
        if partition is None:
            block, args = VB_selection_rules.select(selection_rule,
                                                    x,
                                                    A,
                                                    b,
                                                    lossObject,
                                                    args,
                                                    iteration=i)
        else:
            block, args = FB_selection_rules.select(selection_rule,
                                                    x,
                                                    A,
                                                    b,
                                                    lossObject,
                                                    args,
                                                    partition,
                                                    iteration=i)

        # Update block
        x, args = update_rules.update(update_rule,
                                      x,
                                      A,
                                      b,
                                      lossObject,
                                      args=args,
                                      block=block,
                                      iteration=i)

    pbar.close()

    for score_dict in score_list:
        score_dict["loss"] -= exp_configs.OPTIMAL_LOSS[dataset_name + "_" +
                                                       loss_name]

    return score_list