def write_results(config: configure_finetuning.FinetuningConfig, results):
    """Write evaluation metrics to disk."""
    utils.log("Writing results to", config.results_txt)
    utils.mkdir(config.results_txt.rsplit("/", 1)[0])
    utils.write_pickle(results, config.results_pkl)
    with tf.io.gfile.GFile(config.results_txt, "a") as f:
        results_str = ""
        for trial_results in results:
            for task_name, task_results in trial_results.items():
                if task_name == "time" or task_name == "global_step":
                    continue
                results_str += task_name + ": " + " - ".join([
                    "{:}: {:.2f}".format(k, v)
                    for k, v in task_results.items()
                ]) + "\n"

                # Neptune Metric Logging
                neptune.append_tag('ft')
                neptune.append_tag('tensorflow')
                neptune.set_property('task', task_name)
                for k, v in task_results.items():
                    neptune.log_metric(k, v)

        f.write(results_str)
    utils.write_pickle(results, config.results_pkl)
Exemple #2
0
def main(argv):
    gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param, skip_unknown=True)
    op_config_str = gin.config._CONFIG

    use_neptune = "NEPTUNE_API_TOKEN" in os.environ
    if use_neptune:

        params = utils.get_gin_params_as_dict(gin.config._CONFIG)
        neptune.init(project_qualified_name="melindafkiss/sandbox")

        exp = neptune.create_experiment(params=params, name="exp")
        #ONLY WORKS FOR ONE GIN-CONFIG FILE
        with open(FLAGS.gin_file[0]) as ginf:
            param = ginf.readline()
            while param:
                param = param.replace('.','-').replace('=','-').replace(' ','').replace('\'','').replace('\n','').replace('@','')
                #neptune.append_tag(param)
                param = ginf.readline()
        #for tag in opts['tags'].split(','):
        #  neptune.append_tag(tag)
    else:
        neptune.init('shared/onboarding', api_token='ANONYMOUS', backend=neptune.OfflineBackend())

    er = ExperimentRunner(prefix=exp.id)
    er.train()

    params = utils.get_gin_params_as_dict(gin.config._OPERATIVE_CONFIG)
    for k, v in params.items():
        neptune.set_property(k, v)
    neptune.stop()
    print('fin')
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    application_table_path = os.path.join(RAW_DATA_DIRPATH,
                                          'application_train.csv.zip')
    application_table = pd.read_csv(application_table_path, nrows=NROWS)

    index_table = application_table[['SK_ID_CURR', 'TARGET']]

    with neptune.create_experiment(name='validation schema',
                                   tags=['processed', 'validation'],
                                   upload_source_files=get_filepaths()):

        train_idx, valid_idx = train_test_split(index_table,
                                                test_size=TEST_SIZE,
                                                random_state=SEED)
        train_idx_path = os.path.join(INTERIM_FEATURES_DIRPATH,
                                      'train_idx.csv')
        train_idx.to_csv(train_idx_path, index=None)
        neptune.send_artifact(train_idx_path)
        neptune.set_property('train_split_version', md5_hash(train_idx_path))

        valid_idx_path = os.path.join(INTERIM_FEATURES_DIRPATH,
                                      'valid_idx.csv')
        valid_idx.to_csv(valid_idx_path, index=None)
        neptune.send_artifact(valid_idx_path)
        neptune.set_property('valid_split_version', md5_hash(valid_idx_path))
Exemple #4
0
    def resource_event(self, filename):
        if filename not in self.resources:
            md5 = get_digest(filename)
            self.resources[filename] = md5

        neptune.set_property('resources', str(list(self.resources.keys())))
        neptune.set_property(filename, self.resources[filename])
Exemple #5
0
    def resource_event(self, filename):
        if filename not in self.resources:
            new_prefix = self._create_new_prefix()
            self.resources[filename] = new_prefix
            md5 = get_digest(filename)

            neptune.set_property('{}data_path'.format(new_prefix), filename)
            neptune.set_property('{}data_version'.format(new_prefix), md5)
def main():
    print('loading data')
    train_features_path = os.path.join(
        FEATURES_DATA_PATH, 'train_features_' + FEATURE_NAME + '.csv')

    print('... train')
    train = pd.read_csv(train_features_path, nrows=TRAINING_PARAMS['nrows'])

    idx_split = int(
        (1 - VALIDATION_PARAMS['validation_fraction']) * len(train))
    train, valid = train[:idx_split], train[idx_split:]

    train = sample_negative_class(
        train,
        fraction=TRAINING_PARAMS['negative_sample_fraction'],
        seed=TRAINING_PARAMS['negative_sample_seed'])

    @skopt.utils.use_named_args(SPACE)
    def objective(**params):
        model_params = {**params, **STATIC_PARAMS}
        valid_preds = fit_predict(train,
                                  valid,
                                  None,
                                  model_params,
                                  TRAINING_PARAMS,
                                  fine_tuning=True)
        valid_auc = roc_auc_score(valid['isFraud'], valid_preds)
        return -1.0 * valid_auc

    experiment_params = {
        **STATIC_PARAMS,
        **TRAINING_PARAMS,
        **HPO_PARAMS,
    }

    with neptune.create_experiment(name='skopt forest sweep',
                                   params=experiment_params,
                                   tags=['skopt', 'forest', 'tune'],
                                   upload_source_files=get_filepaths()):
        print('logging data version')
        log_data_version(train_features_path, prefix='train_features_')

        results = skopt.forest_minimize(objective,
                                        SPACE,
                                        callback=[sk_utils.NeptuneMonitor()],
                                        **HPO_PARAMS)
        best_auc = -1.0 * results.fun
        best_params = results.x

        neptune.send_metric('valid_auc', best_auc)
        neptune.set_property('best_parameters', str(best_params))

        sk_utils.send_best_parameters(results)
        sk_utils.send_plot_convergence(results, channel_name='diagnostics_hpo')
        sk_utils.send_plot_evaluations(results, channel_name='diagnostics_hpo')
        sk_utils.send_plot_objective(results, channel_name='diagnostics_hpo')
Exemple #7
0
    def validate(self):
        x = np.random.randn(10, 50)
        y = np.random.randn(10, 10)

        costs = []
        with torch.no_grad():
            for i in range(100):
                x = torch.tensor(x, device=device, dtype=torch.float)
                y = torch.tensor(y, device=device, dtype=torch.float)

                y_hat = self.lin(x)
                cost = torch.mean((y - y_hat) ** 2).item()
                costs.append(cost)
            total_cost = np.mean(costs)
            neptune.set_property(
                "validation",
                {"step": self.step, "epoch": self.epoch, "cost": total_cost},
            )
Exemple #8
0
def main():
    print('started experimnent')
    with neptune.create_experiment(
            name='feature engineering',
            tags=['feature-extraction', FEATURE_NAME],
            upload_source_files=get_filepaths(),
            properties={'feature_version': FEATURE_NAME}):
        print('loading data')
        train = load_and_merge(RAW_DATA_PATH, 'train',
                               NROWS)[ID_COLS + V1_COLS + ['isFraud']]
        test = load_and_merge(RAW_DATA_PATH, 'test', NROWS)[ID_COLS + V1_COLS]

        categorical_cols = set(V1_CAT_COLS)
        print('cleaning data')
        email_cols = ['P_emaildomain', 'R_emaildomain']
        train, new_email_cols = clean_email(train, email_cols)
        test, _ = clean_email(test, email_cols)

        categorical_cols.update(new_email_cols)
        for col in email_cols:
            categorical_cols.remove(col)
        categorical_cols = list(categorical_cols)
        neptune.set_property('categorical_columns', str(categorical_cols))

        print('encoding categoricals')
        encoder = OrdinalEncoder(cols=categorical_cols).fit(
            train[ID_COLS + categorical_cols])
        train[ID_COLS + categorical_cols] = encoder.transform(
            train[ID_COLS + categorical_cols])
        test[ID_COLS + categorical_cols] = encoder.transform(
            test[ID_COLS + categorical_cols])

        train_features_path = os.path.join(
            FEATURES_DATA_PATH, 'train_features_{}.csv'.format(FEATURE_NAME))
        print('saving train to {}'.format(train_features_path))
        train.to_csv(train_features_path, index=None)
        log_data_version(train_features_path, prefix='train_features_')

        test_features_path = os.path.join(
            FEATURES_DATA_PATH, 'test_features_{}.csv'.format(FEATURE_NAME))
        print('saving test to {}'.format(test_features_path))
        test.to_csv(test_features_path, index=None)
        log_data_version(test_features_path, prefix='test_features_')
Exemple #9
0
    def add_params(self, params, step=None):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : Dict
            Key-Value pairs

        Returns
        -------
        None.

        '''
        if self.neptune:
            for key, value in params.items():
                neptune.set_property(key, value)
            if step is not None:
                neptune.set_property('step', step)
        if self.comet:
            self.comet_experiment.log_parameters(params, step=step)
Exemple #10
0
    def train(self):

        x = np.random.randn(10, 50)
        y = np.random.randn(10, 10)

        for epoch in range(10):
            self.epoch = epoch
            for i in range(100):
                x = torch.tensor(x, device=device, dtype=torch.float)
                y = torch.tensor(y, device=device, dtype=torch.float)

                y_hat = self.lin(x)
                cost = torch.mean((y - y_hat) ** 2)

                self.opt.zero_grad()
                cost.backward()
                self.opt.step()

                neptune.send_metric("epoch_cost", self.epoch, cost.item())

                self.step += 1

            self.validate()
        neptune.set_property("epoch", self.epoch)
        neptune.set_property("cost", cost.item())
        neptune.set_property("step", self.step)
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    interim_feature_paths = [APPLICATION_FEATURES_PATH, BUREAU_FEATURES_PATH]

    with neptune.create_experiment(
            name='feature_extraction',
            tags=['processed', 'feature_extraction', 'joined_features'],
            upload_source_files=get_filepaths()):

        features = pd.read_csv(interim_feature_paths[0],
                               usecols=['SK_ID_CURR'],
                               nrows=NROWS)
        for path in interim_feature_paths:
            df = pd.read_csv(path, nrows=NROWS)
            features = features.merge(df, on='SK_ID_CURR')

        features.to_csv(PROCESSED_FEATURES_FILEPATH, index=None)
        neptune.set_property('features_version',
                             md5_hash(PROCESSED_FEATURES_FILEPATH))
        neptune.set_property('features_path', PROCESSED_FEATURES_FILEPATH)
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    application_raw_path = os.path.join(RAW_DATA_DIRPATH,
                                        'application_train.csv.zip')
    application_raw = pd.read_csv(application_raw_path, nrows=NROWS)

    with neptune.create_experiment(
            name='feature_extraction',
            tags=['interim', 'application', 'feature_extraction'],
            upload_source_files=get_filepaths()):

        application_features, (numeric_cols,
                               categorical_cols) = extract(application_raw)
        application_features.to_csv(INTERIM_FEATURES_DIRPATH, index=None)

        neptune.set_property('numeric_features', str(numeric_cols))
        neptune.set_property('categorical_features', str(categorical_cols))
        neptune.set_property('features_version',
                             md5_hash(INTERIM_FEATURES_DIRPATH))
        neptune.set_property('features_path', INTERIM_FEATURES_DIRPATH)
}

# create experiment
with neptune.create_experiment(
        name='classification_example',
        tags=['classification', 'tf_2'],
        upload_source_files=['classification-example.py', 'requirements.txt'],
        params=PARAMS):
    # dataset
    fashion_mnist = keras.datasets.fashion_mnist
    (train_images, train_labels), (test_images,
                                   test_labels) = fashion_mnist.load_data()
    train_images = train_images / 255.0
    test_images = test_images / 255.0

    neptune.set_property('train_images_version',
                         hashlib.md5(train_images).hexdigest())
    neptune.set_property('train_labels_version',
                         hashlib.md5(train_labels).hexdigest())
    neptune.set_property('test_images_version',
                         hashlib.md5(test_images).hexdigest())
    neptune.set_property('test_labels_version',
                         hashlib.md5(test_labels).hexdigest())

    class_names = [
        'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
        'Shirt', 'Sneaker', 'Bag', 'Ankle boot'
    ]

    neptune.set_property('class_names', class_names)

    for j, class_name in enumerate(class_names):
import hashlib

# prepare dataset
(x_train, y_train), (x_test,
                     y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0

class_names = [
    'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt',
    'Sneaker', 'Bag', 'Ankle boot'
]

# log data version
neptune.set_property('x_train_version', hashlib.md5(x_train).hexdigest())
neptune.set_property('y_train_version', hashlib.md5(y_train).hexdigest())
neptune.set_property('x_test_version', hashlib.md5(x_test).hexdigest())
neptune.set_property('y_test_version', hashlib.md5(y_test).hexdigest())

neptune.set_property('class_names', class_names)

# Prepare model and log model architecture summary

# prepare model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(parameters['dense_units'],
                          activation=parameters['activation']),
    tf.keras.layers.Dropout(parameters['dropout']),
    tf.keras.layers.Dense(parameters['dense_units'],
Exemple #15
0
    validation_steps=len(split_data['val'][0])//PARAMS['batch_size']

    # split_datasets = {
    #                   k:base_dataset.BaseDataset \
    #                                 .from_dataframe(
    #                                                 pd.DataFrame({
    #                                                             'path':v[0],
    #                                                             'family':v[1]
    #                                                             })) \
    #                   for k,v in split_data.items()
    #                  }


    with neptune.create_experiment(name=experiment_name, params=PARAMS):

        neptune.set_property('num_classes',data.num_classes)
        neptune.set_property('class_distribution',data.metadata.class_distribution)

    ##########################
        train_data=get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=PARAMS['batch_size'], num_channels=PARAMS['num_channels'], infinite=True, seed=2836)
        validation_data=get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=PARAMS['batch_size'], num_channels=PARAMS['num_channels'], infinite=True, seed=2836)
        train_batch = next(iter(train_data))
        train_images, train_labels = train_batch[0].numpy(), train_batch[1].numpy()
        print(train_images.min(), train_images.max())
        plt.imshow(train_images[5,:,:,:].squeeze())

    ##########################
        num_val_samples = len(split_data['val'][0])
        cm_val_data_loader = iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_val_samples, num_channels=PARAMS['num_channels'], infinite=True, seed=2836))# \
        cm_val_imgs, cm_val_labels = next(cm_val_data_loader)
        cm_callback = ConfusionMatrixCallback(log_dir, cm_val_imgs, cm_val_labels, classes=data.classes, seed=PARAMS['seed'])
Exemple #16
0
    PARAMS['epoch'] = args.epoch
    PARAMS['hidden1'] = args.hidden1
    PARAMS['hidden2'] = args.hidden2
    PARAMS['batch_size'] = args.batch_size

    val_test_size = 0.1
    if args.log:
        neptune.create_experiment(name='example_with_parameters',
                                  params=PARAMS,
                                  upload_stdout=True,
                                  upload_stderr=True,
                                  send_hardware_metrics=True,
                                  upload_source_files='**/*.py')

        neptune.set_property("val_test_size", val_test_size)

    if not args.real:
        run = RunDecagonToy()
        run.run(adj_path=None,
                path_to_split=f'data/split/toy/{PARAMS["batch_size"]}',
                val_test_size=val_test_size,
                batch_size=PARAMS['batch_size'],
                num_epochs=PARAMS['epoch'],
                dropout=PARAMS['dropout'],
                max_margin=PARAMS['max_margin'],
                print_progress_every=150,
                log=args.log,
                on_cpu=args.cpu,
                upload_saved=args.upload_saved)
    else:
Exemple #17
0
        params=experiment_params,
        tags=['skopt', 'gp'],
        upload_source_files=['search_gp.py', 'basic_sweep.py', 'utils.py']):
    results = skopt.gp_minimize(objective,
                                SPACE,
                                callback=[monitor],
                                **HPO_PARAMS)
    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/gp_results.pkl')
    joblib.dump(SPACE, 'artifacts/gp_space.pkl')

    neptune.send_artifact('artifacts/gp_results.pkl')
    neptune.send_artifact('artifacts/gp_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/gp_convergence.png')

    neptune.send_image('diagnostics', 'plots/gp_convergence.png')
def training_pipeline(args):
    ###############################################################################
    # Environment setup
    ###############################################################################

    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    # Check if CUDA device is available and set training on CPUs or GPUs
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )

    device = torch.device(args.cuda_device if args.cuda else "cpu")

    ###############################################################################
    # Experiment tracking setup
    ###############################################################################
    neptune.init(project_qualified_name='karexar/GSW-dialect-classifier')
    args_dict = vars(args)
    neptune.create_experiment(params=args_dict)
    if hasattr(args, 'experiment_id'):
        neptune.append_tag(args.experiment_id)
    neptune.set_property('lm_algo', 'lstm')
    for key in args_dict.keys():
        neptune.set_property(key, args_dict[key])

    ###############################################################################
    # Load data
    ###############################################################################

    print('Loading data')
    data_manager = DataManager(args.data, device, args.batch_size,
                               args.eval_batch_size)

    ###############################################################################
    # Build the model
    ###############################################################################

    print('Building model')
    num_tokens = data_manager.vocab_size
    num_labels = data_manager.num_labels
    embeddings_matrix = None
    if args.use_pretrained_embed:
        # Load pre-trained word embeddings model
        # and generate the embeddings weight matrix for the entire vocabulary
        assert args.embed_algo is not None
        print(f'Using {args.embed_algo} pre-trained word embeddings')

        if args.embed_algo == 'word2vec':
            pretrained_embeddings = Word2VecModel(args.model_path_embed,
                                                  args.model_name_embed,
                                                  load_from_disk=True)
            embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings(
                data_manager.idx2word, args.embed_size)
        elif args.embed_algo == 'glove':
            pretrained_embeddings = GloveModel(args.model_path_embed,
                                               args.model_name_embed,
                                               load_from_disk=True)
            embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings(
                data_manager.idx2word, args.embed_size)

    model = LSTM(num_tokens, args.embed_size, args.num_hidden, args.num_layers,
                 args.dropout, num_labels, embeddings_matrix).to(device)

    print('Model architecture')
    print(model)

    criterion = nn.CrossEntropyLoss()

    ###############################################################################
    # Training code
    ###############################################################################

    print('Initialising model executor')
    model_executor = ModelExecutor(model, data_manager, device, criterion)

    if args.train_lstm:
        # Loop over epochs
        learning_rate = args.learning_rate
        best_val_accuracy = None
        last_val_accuracy = 0
        model_optimiser = optim.SGD(model.parameters(), lr=learning_rate)

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            print('Starting the training process')
            for epoch in range(1, args.epochs + 1):
                epoch_start_time = time.time()

                _, _ = model_executor.train(epoch, args.batch_size,
                                            learning_rate, model_optimiser,
                                            args.clip, args.log_interval)
                val_loss, val_accuracy = model_executor.evaluate(
                    data_manager.val_iter, args.eval_batch_size)

                # Log result in Neptune ML
                neptune.send_metric('valid_loss', epoch, val_loss)
                neptune.send_metric('valid_accuracy', epoch, val_accuracy)
                neptune.send_metric('learning_rate', epoch, learning_rate)

                if epoch % 3 == 0:
                    learning_rate *= 0.9  # correct the learning rate after some number of epochs

                print('-' * 89)
                print(
                    '| End of epoch {:3d} | Time: {:5.2f}s | Valid loss {:6.2f} | '
                    'Valid accuracy {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        val_accuracy))
                print('-' * 89)

                # Save the model if the validation accuracy is the best we've seen so far.
                if not best_val_accuracy or val_accuracy > best_val_accuracy:
                    model_executor.model.export_model(args.model_path_lstm)
                    best_val_accuracy = val_accuracy

                if val_accuracy < last_val_accuracy:
                    # Anneal the learning rate if no improvement has been seen in the validation dataset.
                    learning_rate /= 2.0

                for group in model_optimiser.param_groups:
                    group['lr'] = learning_rate

                last_val_accuracy = val_accuracy

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

    ###############################################################################
    # Evaluation code
    ###############################################################################

    test_loss = None
    test_accuracy = None
    if args.eval_lstm:
        print('Evaluating on the test set')

        # Load the best saved model.
        model_executor.load_pre_trained_model(args.model_path_lstm,
                                              device=device)

        # Run on test data.
        test_loss, test_accuracy = model_executor.evaluate(
            data_manager.test_iter, args.eval_batch_size)

        # Log result in Neptune ML
        neptune.send_metric('test_loss', test_loss)
        neptune.send_metric('test_accuracy', test_accuracy)

        print('-' * 89)
        print('| End of evaluation | Test loss {:6.2f}'.format(test_loss) +
              ' | Test accuracy {:8.2f}'.format(test_accuracy))
        print('-' * 89)

    ###############################################################################
    # Stop the experiment tracking
    ###############################################################################

    neptune.stop()

    return test_loss, test_accuracy
Exemple #19
0
    def modify_properties(self):
        neptune.set_property("prop", "some text")
        neptune.set_property("prop_number", 42)
        neptune.set_property("nested/prop", 42)
        neptune.set_property("prop_to_del", 42)
        neptune.set_property("prop_list", [1, 2, 3])
        with open(self.text_file_path, mode="r") as f:
            neptune.set_property("prop_IO", f)
        neptune.set_property("prop_datetime", datetime.now())
        neptune.remove_property("prop_to_del")

        exp = neptune.get_experiment()
        properties = exp.get_properties()
        assert properties["prop"] == "some text"
        assert properties["prop_number"] == "42"
        assert properties["nested/prop"] == "42"
        assert properties["prop_list"] == "[1, 2, 3]"
        assert "prop_to_del" not in properties
        assert (properties["prop_IO"] ==
                "<_io.TextIOWrapper name='alpha_integration_dev/data/text.txt'"
                " mode='r' encoding='UTF-8'>")
        print(f"Properties: {properties}")
Exemple #20
0
    def learning(self, model, criterion, train_dataset, val_dataset, optimizer=None):

        self.init_learning(model, criterion)

        # define train and val transform
        train_dataset.transform = self.state['train_transform']
        train_dataset.target_transform = self._state('train_target_transform')
        val_dataset.transform = self.state['val_transform']
        val_dataset.target_transform = self._state('val_target_transform')

        # data loading code
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=self.state['batch_size'], shuffle=True,
                                                   num_workers=self.state['workers'])

        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=self.state['batch_size_test'], shuffle=False,
                                                 num_workers=self.state['workers'])

        # optionally resume from a checkpoint
        if self._state('resume') is not None:
            if os.path.isfile(self.state['resume']):
                print("=> loading checkpoint '{}'".format(self.state['resume']))
                checkpoint = torch.load(self.state['resume'])
                self.state['start_epoch'] = checkpoint['epoch']
                self.state['best_score'] = checkpoint['best_score']
                model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(self.state['evaluate'], checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.state['resume']))


        if self.state['use_gpu']:
            train_loader.pin_memory = True
            val_loader.pin_memory = True
            cudnn.benchmark = False

            model = torch.nn.DataParallel(model, device_ids=self.state['device_ids']).cuda()

            criterion = criterion.cuda()

        if self.state['evaluate']:
            self.validate(val_loader, model, criterion)
            return

        # TODO define optimizer

        for epoch in range(self.state['start_epoch'], self.state['max_epochs']):
            self.state['epoch'] = epoch
            lr = self.adjust_learning_rate(optimizer)
            print('lr:',lr, '|', 'step:' ,self.state['epoch_step'],'|', 'decay: ', self.state['lr_decay'])

            # train for one epoch
            self.train(train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            prec1 = self.validate(val_loader, model, criterion)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > self.state['best_score']
            self.state['best_score'] = max(prec1, self.state['best_score'])
            self.save_checkpoint({
                'epoch': epoch + 1,
                'arch': self._state('arch'),
                'state_dict': model.module.state_dict() if self.state['use_gpu'] else model.state_dict(),
                'best_score': self.state['best_score'],
            }, is_best)

            print(' *** best={best:.3f}'.format(best=self.state['best_score']))

            if self.state['neptune']:
                try:
                    neptune.set_property('top', float(self.state['best_score']))
                except:
                    print("Neptune exception occurred")

        return self.state['best_score']
Exemple #21
0
 def log_property(self, name: str, value: Union[str, int, float]):
     if not self.disabled:
         neptune.set_property(name, str(value))
Exemple #22
0
def main(**kwargs):

    import sys

    for k, v in kwargs.items():
        sys.argv += [k, v]

    from pprint import pprint
    import argparse
    import datetime
    import json
    import os


    parser = argparse.ArgumentParser()
    parser.add_argument('--neptune_project_name', default='jacobarose/sandbox', type=str, help='Neptune.ai project name to log under')
    parser.add_argument('--experiment_name', default='pnas_minimal_example', type=str, help='Neptune.ai experiment name to log under')
    parser.add_argument('--config_path', default=r'/home/jacob/projects/pyleaves/pyleaves/configs/example_configs/pnas_resnet_config.json', type=str, help='JSON config file')
    parser.add_argument('-gpu', '--gpu_id', default='1', type=str, help='integer number of gpu to train on', dest='gpu_id')
    parser.add_argument('-tags', '--add-tags', default=[], type=str, nargs='*', help='Add arbitrary list of tags to apply to this run in neptune', dest='tags')
    parser.add_argument('-f', default=None)
    args = parser.parse_args()

    with open(args.config_path, 'r') as config_file:
        PARAMS = json.load(config_file)

    # print(gpu)
    # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)

    pprint(PARAMS)
    import tensorflow as tf
    import neptune
    # tf.debugging.set_log_device_placement(True)
    print(tf.__version__)





    import arrow
    import numpy as np
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import io
    from stuf import stuf
    from more_itertools import unzip
    from functools import partial
    # import tensorflow as tf
    # tf.compat.v1.enable_eager_execution()
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    from pyleaves.leavesdb.tf_utils.tf_utils import set_random_seed, reset_keras_session
    import pyleaves
    from pyleaves.utils.img_utils import random_pad_image
    from pyleaves.utils.utils import ensure_dir_exists
    from pyleaves.datasets import leaves_dataset, fossil_dataset, pnas_dataset, base_dataset
    from pyleaves.models.vgg16 import VGG16, VGG16GrayScale
    from pyleaves.models import resnet, vgg16
    from tensorflow.compat.v1.keras.callbacks import Callback, ModelCheckpoint, TensorBoard, LearningRateScheduler, EarlyStopping
    from tensorflow.keras import metrics
    from tensorflow.keras.preprocessing.image import load_img, img_to_array
    from tensorflow.keras import layers
    from tensorflow.keras import backend as K
    import tensorflow_datasets as tfds
    import neptune_tensorboard as neptune_tb

    seed = 346
    # set_random_seed(seed)
    # reset_keras_session()
    def get_preprocessing_func(model_name):
        if model_name.startswith('resnet'):
            from tensorflow.keras.applications.resnet_v2 import preprocess_input
        elif model_name == 'vgg16':
            from tensorflow.keras.applications.vgg16 import preprocess_input
        elif model_name=='shallow':
            def preprocess_input(x):
                return x/255.0 # ((x/255.0)-0.5)*2.0

        return preprocess_input #lambda x,y: (preprocess_input(x),y)

    def _load_img(image_path):#, img_size=(224,224)):
        img = tf.io.read_file(image_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.convert_image_dtype(img, tf.float32)
        return img
        # return tf.compat.v1.image.resize_image_with_pad(img, *img_size)

    def _encode_label(label, num_classes=19):
        label = tf.cast(label, tf.int32)
        label = tf.one_hot(label, depth=num_classes)
        return label

    def _load_example(image_path, label, num_classes=19):
        img = _load_img(image_path)
        one_hot_label = _encode_label(label, num_classes=num_classes)
        return img, one_hot_label

    def _load_uint8_example(image_path, label, num_classes=19):
        img = tf.image.convert_image_dtype(_load_img(image_path)*255.0, dtype=tf.uint8)
        one_hot_label = _encode_label(label, num_classes=num_classes)
        return img, one_hot_label

    def rgb2gray_3channel(img, label):
        '''
        Convert rgb image to grayscale, but keep num_channels=3
        '''
        img = tf.image.rgb_to_grayscale(img)
        img = tf.image.grayscale_to_rgb(img)
        return img, label

    def rgb2gray_1channel(img, label):
        '''
        Convert rgb image to grayscale, num_channels from 3 to 1
        '''
        img = tf.image.rgb_to_grayscale(img)
        return img, label

    def log_data(logs):
        for k, v in logs.items():
            neptune.log_metric(k, v)

    neptune_logger = tf.keras.callbacks.LambdaCallback(on_epoch_end=lambda epoch, logs: log_data(logs))

    def focal_loss(gamma=2.0, alpha=4.0):

        gamma = float(gamma)
        alpha = float(alpha)

        def focal_loss_fixed(y_true, y_pred):
            """Focal loss for multi-classification
            FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
            Notice: y_pred is probability after softmax
            gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
            d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
            Focal Loss for Dense Object Detection
            https://arxiv.org/abs/1708.02002

            Arguments:
                y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
                y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

            Keyword Arguments:
                gamma {float} -- (default: {2.0})
                alpha {float} -- (default: {4.0})

            Returns:
                [tensor] -- loss.
            """
            epsilon = 1.e-9
            y_true = tf.convert_to_tensor(y_true, tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, tf.float32)

            model_out = tf.add(y_pred, epsilon)
            ce = tf.multiply(y_true, -tf.log(model_out))
            weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
            fl = tf.multiply(alpha, tf.multiply(weight, ce))
            reduced_fl = tf.reduce_max(fl, axis=1)
            return tf.reduce_mean(reduced_fl)
        return focal_loss_fixed


    def per_class_accuracy(y_true, y_pred):
        return tf.metrics.mean_per_class_accuracy(y_true, y_pred, num_classes=PARAMS['num_classes'])

    def build_model(model_params,
                    optimizer,
                    loss,
                    METRICS):

        if model_params['name']=='vgg16':
            model_builder = vgg16.VGG16GrayScale(model_params)
        elif model_params['name'].startswith('resnet'):
            model_builder = resnet.ResNet(model_params)

        base = model_builder.build_base()
        model = model_builder.build_head(base)

        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=METRICS)

        return model

    def build_shallow(input_shape=(224,224,3),
                      num_classes=10,
                      optimizer=None,
                      loss=None,
                      METRICS=None):

        model = tf.keras.models.Sequential()
        model.add(layers.Conv2D(64, (7, 7), activation='relu', input_shape=input_shape, kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.Flatten())
        model.add(layers.Dense(64*2, activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.Dense(num_classes,activation='softmax', kernel_initializer=tf.initializers.GlorotNormal()))

        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=METRICS)

        return model


    class ImageLogger:
        '''Tensorflow 2.0 version'''
        def __init__(self, log_dir: str, max_images: int, name: str):
            self.file_writer = tf.summary.create_file_writer(log_dir)
            self.log_dir = log_dir
            self.max_images = max_images
            self.name = name
            self._counter = tf.Variable(0, dtype=tf.int64)

            self.filepaths = []

        def add_log(self, img, counter=None, name=None):
            '''
            Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc)

            Currently takes a filepath pointing to an image file and logs to current neptune experiment.
            '''

            # scaled_images = (img - tf.math.reduce_min(img))/(tf.math.reduce_max(img) - tf.math.reduce_min(img))
            # keep = 0
            # scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8)
            # scaled_images = tf.expand_dims(scaled_images, 0)
            # tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images)


            scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0
            scaled_img = scaled_img.astype(np.uint32)

            neptune.log_image(log_name= name or self.name,
                              x=counter,
                              y=scaled_img)
            return scaled_img

        def __call__(self, images, labels):

            with self.file_writer.as_default():
                scaled_images = (images - tf.math.reduce_min(images))/(tf.math.reduce_max(images) - tf.math.reduce_min(images))
                keep = 0

                scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8)
                scaled_images = tf.expand_dims(scaled_images, 0)

                labels = tf.argmax(labels[[keep], :],axis=1)
                tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images)

                filepath = os.path.join(self.log_dir,'sample_images',f'{self.name}-{self._counter}.jpg')

                scaled_images = tf.image.encode_jpeg(tf.squeeze(scaled_images))
                tf.io.write_file(filename=tf.constant(filepath),
                                 contents=scaled_images)

            # self.add_log(scaled_images)
            self._counter.assign_add(1)
            return images, labels

    def _cond_apply(x, y, func, prob):
        """Conditionally apply func to x and y with probability prob.

        Parameters
        ----------
        x : type
            Input to conditionally pass through func
        y : type
            Label
        func : type
            Function to conditionally be applied to x and y
        prob : type
            Probability of applying function, within range [0.0,1.0]

        Returns
        -------
        x, y
        """
        return tf.cond((tf.random.uniform([], 0, 1) >= (1.0 - prob)), lambda: func(x,y), lambda: (x,y))


    class ImageAugmentor:
        """Short summary.

        Parameters
        ----------
        augmentations : dict
            Maps a sequence of named augmentations to a scalar probability,
             according to which they'll be conditionally applied in order.
        resize_w_pad : tuple, default=None
            Description of parameter `resize_w_pad`.
        random_crop :  tuple, default=None
            Description of parameter `random_crop`.
        random_jitter : dict
            First applies resize_w_pad, then random_crop. If user desires only 1 of these, set this to None.
            Should be a dict with 2 keys:
                'resize':(height, width)
                'crop_size':(crop_height,crop_width, channels)

        Only 1 of these 3 kwargs should be provided to any given augmentor:
        {'resize_w_pad', 'random_crop', 'random_jitter'}
        Example values for each:
            resize_w_pad=(224,224)
            random_crop=(224,224,3)
            random_jitter={'resize':(338,338),
                           'crop_size':(224,224, 3)}



        seed : int, default=None
            Random seed to apply to all augmentations

        Examples
        -------
        Examples should be written in doctest format, and
        should illustrate how to use the function/class.
        >>>

        Attributes
        ----------
        augmentations

        """

        def __init__(self,
                     name='',
                     augmentations={'rotate':1.0,
                                    'flip':1.0,
                                    'color':1.0,
                                    'rgb2gray_3channel':1.0},
                     resize_w_pad=None,
                     random_crop=None,
                     random_jitter={'resize':(338,338),
                                    'crop_size':(224,224,3)},
                     log_dir=None,
                     seed=None):

            self.name = name
            self.augmentations = augmentations
            self.seed = seed

            if resize_w_pad:
                self.target_h = resize_w_pad[0]
                self.target_w = resize_w_pad[1]
                # self.resize = self.resize_w_pad
            elif random_crop:
                self.crop_size = random_crop
                self.target_h = self.crop_size[0]
                self.target_w = self.crop_size[1]
                # self.resize = self.random_crop
            elif random_jitter:
                # self.target_h = tf.random.uniform([], random_jitter['crop_size'][0], random_jitter['resize'][0], dtype=tf.int32, seed=self.seed)
                # self.target_w = tf.random.uniform([], random_jitter['crop_size'][1], random_jitter['resize'][1], dtype=tf.int32, seed=self.seed)
                self.crop_size = random_jitter['crop_size']
                # self.resize = self.random_jitter
                self.target_h = random_jitter['crop_size'][0]
                self.target_w = random_jitter['crop_size'][1]
            self.resize = self.resize_w_pad



            self.maps = {'rotate':self.rotate,
                          'flip':self.flip,
                          'color':self.color,
                          'rgb2gray_3channel':self.rgb2gray_3channel,
                          'rgb2gray_1channel':self.rgb2gray_1channel}

            self.log_dir = log_dir

        def rotate(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Rotation augmentation

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            # Rotate 0, 90, 180, 270 degrees
            return tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32,seed=self.seed)), label

        def flip(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Flip augmentation

            Args:
                x,     tf.Tensor: Image to flip
                label, tf.Tensor: arbitrary tensor, passes through unchanged
            Returns:
                Augmented image, label
            """
            x = tf.image.random_flip_left_right(x, seed=self.seed)
            x = tf.image.random_flip_up_down(x, seed=self.seed)

            return x, label

        def color(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Color augmentation

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.random_hue(x, 0.08, seed=self.seed)
            x = tf.image.random_saturation(x, 0.6, 1.6, seed=self.seed)
            x = tf.image.random_brightness(x, 0.05, seed=self.seed)
            x = tf.image.random_contrast(x, 0.7, 1.3, seed=self.seed)
            return x, label

        def rgb2gray_3channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Convert RGB image -> grayscale image, maintain number of channels = 3

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.rgb_to_grayscale(x)
            x = tf.image.grayscale_to_rgb(x)
            return x, label

        def rgb2gray_1channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Convert RGB image -> grayscale image, reduce number of channels from 3 -> 1

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.rgb_to_grayscale(x)
            return x, label

        def resize_w_pad(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            # TODO Finish this
            # random_pad_image(x,min_image_size=None,max_image_size=None,pad_color=None,seed=self.seed)
            return tf.image.resize_with_pad(x, target_height=self.target_h, target_width=self.target_w), label

        def random_crop(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            return tf.image.random_crop(x, size=self.crop_size), label

        @tf.function
        def random_jitter(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            x, label = self.resize_w_pad(x, label)
            x, label = self.random_crop(x, label)
            return x, label

        def apply_augmentations(self, dataset: tf.data.Dataset):
            """
            Call this function to apply all of the augmentation in the order of specification
            provided to the constructor __init__() of ImageAugmentor.

            Args:
                dataset, tf.data.Dataset: must yield individual examples of form (x, y)
            Returns:
                Augmented dataset
            """

            dataset = dataset.map(self.resize, num_parallel_calls=AUTOTUNE)

            for aug_name, aug_p in self.augmentations.items():
                aug = self.maps[aug_name]
                dataset = dataset.map(lambda x,y: _cond_apply(x, y, aug, prob=aug_p), num_parallel_calls=AUTOTUNE)
                # dataset = dataset.map(lambda x,y: _cond_apply(x, y, func=aug, prob=aug_p), num_parallel_calls=AUTOTUNE)

            return dataset


    class ImageLoggerCallback(Callback):
        '''Tensorflow 2.0 version

        Callback that keeps track of a tf.data.Dataset and logs the correct batch to neptune based on the current batch.
        '''
        def __init__(self, data :tf.data.Dataset, freq=1, max_images=-1, name='', encoder=None):

            self.data = data
            self.freq = freq
            self.max_images = max_images
            self.name = name
            self.encoder=encoder
            self.init_iterator()

        def init_iterator(self):
            self.data_iter = iter(self.data)
            self._batch = 0
            self._count = 0
            self.finished = False

        def yield_batch(self):
            batch_data = next(self.data_iter)
            self._batch += 1
            self._count += batch_data[0].shape[0]
            return batch_data

        def add_log(self, img, counter=None, name=None):
            '''
            Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc)

            Currently takes a filepath pointing to an image file and logs to current neptune experiment.
            '''
            scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0
            scaled_img = scaled_img.astype(np.uint32)

            neptune.log_image(log_name= name or self.name,
                              x=counter,
                              y=scaled_img)
            return scaled_img

        def on_train_batch_begin(self, batch, logs=None):
            if batch % self.freq or self.finished:
                return
            while batch >= self._batch:
                x, y = self.yield_batch()

            if self.max_images==-1:
                self.max_images=x.shape[0]

            if x.ndim==3:
                np.newaxis(x, axis=0)
            if x.shape[0]>self.max_images:
                x = x[:self.max_images,...]
                y = y[:self.max_images,...]

            x = x.numpy()
            y = np.argmax(y.numpy(),axis=1)
            if self.encoder:
                y = self.encoder.decode(y)
            for i in range(x.shape[0]):
                # self.add_log(x[i,...], counter=i, name = f'{self.name}-{y[i]}-batch_{str(self._batch).zfill(3)}')
                self.add_log(x[i,...], counter=self._count+i, name = f'{self.name}-{y[i]}')
            print(f'Batch {self._batch}: Logged {np.max([x.shape[0],self.max_images])} {self.name} images to neptune')

        def on_epoch_end(self, epoch, logs={}):
            self.finished = True


    class ConfusionMatrixCallback(Callback):
        '''Tensorflow 2.0 version'''
        def __init__(self, log_dir, imgs : dict, labels : dict, classes, freq=1, include_train=False, seed=None):
            self.file_writer = tf.summary.create_file_writer(log_dir)
            self.log_dir = log_dir
            self.seed = seed
            self._counter = 0
            assert np.all(np.array(imgs.keys()) == np.array(labels.keys()))
            self.imgs = imgs

            for k,v in labels.items():
                if v.ndim==2:
                    labels[k] = tf.argmax(v,axis=-1)
            self.labels = labels
            self.num_samples = {k:l.numpy().shape[0] for k,l in labels.items()}
            self.classes = classes
            self.freq = freq
            self.include_train = include_train

        def log_confusion_matrix(self, model, imgs, labels, epoch, name='', norm_cm=False):

            pred_labels = model.predict_classes(imgs)
            # pred_labels = tf.argmax(pred_labels,axis=-1)
            pred_labels = pred_labels[:,None]

            con_mat = tf.math.confusion_matrix(labels=labels, predictions=pred_labels, num_classes=len(self.classes)).numpy()
            if norm_cm:
                con_mat = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
            con_mat_df = pd.DataFrame(con_mat,
                             index = self.classes,
                             columns = self.classes)

            figure = plt.figure(figsize=(12, 12))
            sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues)
            plt.tight_layout()
            plt.ylabel('True label')
            plt.xlabel('Predicted label')

            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            buf.seek(0)

            image = tf.image.decode_png(buf.getvalue(), channels=4)
            image = tf.expand_dims(image, 0)

            with self.file_writer.as_default():
                tf.summary.image(name=name+'_confusion_matrix', data=image, step=self._counter)

            neptune.log_image(log_name=name+'_confusion_matrix',
                              x=self._counter,
                              y=figure)
            plt.close(figure)
            self._counter += 1

            return image

        def on_epoch_end(self, epoch, logs={}):

            if (not self.freq) or (epoch%self.freq != 0):
                return

            if self.include_train:
                cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['train'], self.labels['train'], epoch=epoch, name='train')
            cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['val'], self.labels['val'], epoch=epoch, name='val')

####################################################################################
####################################################################################
####################################################################################



    neptune.init(project_qualified_name=args.neptune_project_name)
    # neptune_tb.integrate_with_tensorflow()


    experiment_dir = '/media/data/jacob/sandbox_logs'
    experiment_name = args.experiment_name

    experiment_start_time = arrow.utcnow().format('YYYY-MM-DD_HH-mm-ss')
    log_dir =os.path.join(experiment_dir, experiment_name, 'log_dir',PARAMS['loss'], experiment_start_time)
    ensure_dir_exists(log_dir)
    print('Tensorboard log_dir: ', log_dir)
    # os.system(f'neptune tensorboard {log_dir} --project {args.neptune_project_name}')

    weights_best = os.path.join(log_dir, 'model_ckpt.h5')
    restore_best_weights=False
    histogram_freq=0
    patience=25
    num_epochs = PARAMS['num_epochs']
    initial_epoch=0

    src_db = pyleaves.DATABASE_PATH
    datasets = {
                'PNAS': pnas_dataset.PNASDataset(src_db=src_db),
                'Leaves': leaves_dataset.LeavesDataset(src_db=src_db),
                'Fossil': fossil_dataset.FossilDataset(src_db=src_db)
                }
    # data = datasets[PARAMS['dataset_name']]
    data_config = stuf(threshold=PARAMS['data_threshold'],
                       num_classes=PARAMS['num_classes']    ,
                       data_splits_meta={
                                         'train':PARAMS['train_size'],
                                         'val':PARAMS['val_size'],
                                         'test':PARAMS['test_size']
                                        }
                       )

    preprocess_input = get_preprocessing_func(PARAMS['model_name'])
    preprocess_input(tf.zeros([4, 224, 224, 3]))
    
    load_example = partial(_load_uint8_example, num_classes=data_config.num_classes)
    # load_example = partial(_load_example, num_classes=data_config.num_classes)


    if PARAMS['num_channels']==3:
        color_aug = {'rgb2gray_3channel':1.0}
    elif PARAMS['num_channels']==1:
        color_aug = {'rgb2gray_1channel':1.0}

    resize_w_pad=None
    random_jitter=None
    if not PARAMS['random_jitter']['resize']:
        resize_w_pad = PARAMS['image_size']
    else:
        random_jitter=PARAMS['random_jitter']

    TRAIN_image_augmentor = ImageAugmentor(name='train',
                                           augmentations={**PARAMS["augmentations"],
                                                          **color_aug},#'rotate':1.0,'flip':1.0,**color_aug},
                                           resize_w_pad=resize_w_pad,
                                           random_crop=None,
                                           random_jitter=random_jitter,
                                           log_dir=log_dir,
                                           seed=None)
    VAL_image_augmentor = ImageAugmentor(name='val',
                                         augmentations={**color_aug},
                                         resize_w_pad=PARAMS['image_size'],
                                         random_crop=None,
                                         random_jitter=None,
                                         log_dir=log_dir,
                                         seed=None)
    TEST_image_augmentor = ImageAugmentor(name='test',
                                          augmentations={**color_aug},
                                          resize_w_pad=PARAMS['image_size'],
                                          random_crop=None,
                                          random_jitter=None,
                                          log_dir=log_dir,
                                          seed=None)


    def neptune_log_augmented_images(split_data, num_demo_samples=40, PARAMS=PARAMS):
        num_demo_samples = 40
        cm_data_x = {'train':[],'val':[]}
        cm_data_y = {'train':[],'val':[]}
        cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=False,seed=2836)))
        cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=False, seed=2836)))

        for (k_x,v_x), (k_y, v_y) in zip(cm_data_x.items(), cm_data_y.items()):
            x = tf.data.Dataset.from_tensor_slices(v_x)
            y = tf.data.Dataset.from_tensor_slices(v_y)
            xy_data = tf.data.Dataset.zip((x, y))
            v = xy_data.map(VAL_image_augmentor.resize, num_parallel_calls=AUTOTUNE)
            v_aug = TRAIN_image_augmentor.apply_augmentations(xy_data)
            v_x, v_y = [i.numpy() for i in next(iter(v.batch(10*num_demo_samples)))]
            v_x_aug, v_y_aug = [i.numpy() for i in next(iter(v_aug.batch(10*num_demo_samples)))]
            k = k_x
            for i in range(num_demo_samples):
                print(f'Neptune: logging {k}_{i}')
                print(f'{v_x[i].shape}, {v_x_aug[i].shape}')
                idx = np.random.randint(0,len(v_x))
                if True: #'train' in k:
                    TRAIN_image_augmentor.logger.add_log(v_x[idx],counter=i, name=k)
                    TRAIN_image_augmentor.logger.add_log(v_x_aug[idx],counter=i, name=k+'_aug')


    def get_data_loader(data : tuple, data_subset_mode='train', batch_size=32, num_classes=None, infinite=True, augment=True, seed=2836):

        num_samples = len(data[0])
        x = tf.data.Dataset.from_tensor_slices(data[0])
        labels = tf.data.Dataset.from_tensor_slices(data[1])
        data = tf.data.Dataset.zip((x, labels))

        data = data.cache()
        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=num_samples)

        # data = data.map(lambda x,y: (tf.image.convert_image_dtype(load_img(x)*255.0,dtype=tf.uint8),y), num_parallel_calls=-1)
        # data = data.map(load_example, num_parallel_calls=AUTOTUNE)
        data = data.map(load_example, num_parallel_calls=AUTOTUNE)


        data = data.map(lambda x,y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE)

        if infinite:
            data = data.repeat()

        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=200, seed=seed)
            augmentor = TRAIN_image_augmentor
        elif data_subset_mode == 'val':
            augmentor = VAL_image_augmentor
        elif data_subset_mode == 'test':
            augmentor = TEST_image_augmentor

        if augment:
            data = augmentor.apply_augmentations(data)

        data = data.batch(batch_size, drop_remainder=True)

        return data.prefetch(AUTOTUNE)

    def get_tfds_data_loader(data : tf.data.Dataset, data_subset_mode='train', batch_size=32, num_samples=100, num_classes=19, infinite=True, augment=True, seed=2836):


        def encode_example(x, y):
            x = tf.image.convert_image_dtype(x, tf.float32) * 255.0
            y = _encode_label(y, num_classes=num_classes)
            return x, y

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.shuffle(buffer_size=num_samples) \
                   .cache() \
                   .map(encode_example, num_parallel_calls=AUTOTUNE)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.map(preprocess_input, num_parallel_calls=AUTOTUNE)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=100, seed=seed)
            augmentor = TRAIN_image_augmentor
        elif data_subset_mode == 'val':
            augmentor = VAL_image_augmentor
        elif data_subset_mode == 'test':
            augmentor = TEST_image_augmentor

        if augment:
            data = augmentor.apply_augmentations(data)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.batch(batch_size, drop_remainder=True)
        if infinite:
            data = data.repeat()

        return data.prefetch(AUTOTUNE)






    # y_true = [[0, 1, 0], [0, 0, 1]]
    # y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

    def accuracy(y_true, y_pred):
        y_pred = tf.argmax(y_pred, axis=-1)
        y_true = tf.argmax(y_true, axis=-1)

        return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))


    def true_pos(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        return K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))

    def false_pos(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        return all_positives - true_positives

    def true_neg(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        return K.sum(1-K.round(K.clip(y_true * y_pred, 0, 1)))

    def recall(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))

        recall = true_positives / (all_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        y_true = K.ones_like(y_true)

        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))

        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        # tf.print(y_true, y_pred)
        return precision

    def f1_score(y_true, y_pred):
        m_precision = precision(y_true, y_pred)
        m_recall = recall(y_true, y_pred)
        # pdb.set_trace()
        return 2*((m_precision*m_recall)/(m_precision+m_recall+K.epsilon()))

    # def false_neg(y_true, y_pred):
    #     y_true = K.ones_like(~y_true)
    #     true_neg = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    #     all_negative = K.sum(K.round(K.clip(y_true, 0, 1)))
    #     return all_negatives - true_

        # return K.mean(K.argmax(y_true,axis=1)*K.argmax(y_pred,axis=1))

        # 'accuracy',
        # metrics.TrueNegatives(name='tn'),
        # metrics.FalseNegatives(name='fn'),
    METRICS = [
        f1_score,
        metrics.TruePositives(name='tp'),
        metrics.FalsePositives(name='fp'),
        metrics.CategoricalAccuracy(name='accuracy'),
        metrics.TopKCategoricalAccuracy(name='top_3_categorical_accuracy', k=3),
        metrics.TopKCategoricalAccuracy(name='top_5_categorical_accuracy', k=5)
    ]
    PARAMS['sys.argv'] = ' '.join(sys.argv)

    with neptune.create_experiment(name=experiment_name, params=PARAMS, upload_source_files=[__file__]):


        print('Logging experiment tags:')
        for tag in args.tags:
            print(tag)
            neptune.append_tag(tag)

        neptune.append_tag(PARAMS['dataset_name'])
        neptune.append_tag(PARAMS['model_name'])
        neptune.log_artifact(args.config_path)
        cm_data_x = {'train':[],'val':[]}
        cm_data_y = {'train':[],'val':[]}

        if PARAMS['dataset_name'] in tfds.list_builders():
            num_demo_samples=40

            tfds_builder = tfds.builder(PARAMS['dataset_name'])
            tfds_builder.download_and_prepare()

            num_samples = tfds_builder.info.splits['train'].num_examples
            num_samples_dict = {'train':int(num_samples*PARAMS['train_size']),
                            'val':int(num_samples*PARAMS['val_size']),
                            'test':int(num_samples*PARAMS['test_size'])}

            classes = tfds_builder.info.features['label'].names
            num_classes = len(classes)

            train_slice = [0,int(PARAMS['train_size']*100)]
            val_slice = [int(PARAMS['train_size']*100), int((PARAMS['train_size']+PARAMS['val_size'])*100)]
            test_slice = [100 - int(PARAMS['test_size']*100), 100]

            tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True)
            tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True)
            tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True)

            # PARAMS['batch_size']=1
            train_data = get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836)
            validation_data = get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837)
            test_data = get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838)

            # tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True)
            # tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True)
            # tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True)

            split_data = {'train':get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=num_demo_samples, num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836),
                          'val':get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=num_demo_samples, num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837),
                          'test':get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=num_demo_samples, num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838)
                          }

            steps_per_epoch=num_samples_dict['train']//PARAMS['batch_size']
            validation_steps=num_samples_dict['val']//PARAMS['batch_size']

            cm_data_x['train'], cm_data_y['train'] = next(iter(split_data['train']))
            cm_data_x['val'], cm_data_y['val'] = next(iter(split_data['val']))

        else:
            data = datasets[PARAMS['dataset_name']]
            neptune.set_property('num_classes',data.num_classes)
            neptune.set_property('class_distribution',data.metadata.class_distribution)

            encoder = base_dataset.LabelEncoder(data.data.family)
            split_data = base_dataset.preprocess_data(data, encoder, data_config)
            # import pdb;pdb.set_trace()
            for subset, subset_data in split_data.items():
                split_data[subset] = [list(i) for i in unzip(subset_data)]

            PARAMS['batch_size'] = 32

            steps_per_epoch=len(split_data['train'][0])//PARAMS['batch_size']#//10
            validation_steps=len(split_data['val'][0])//PARAMS['batch_size']#//10

            split_datasets = {
                              k:base_dataset.BaseDataset.from_dataframe(
                                pd.DataFrame({
                                            'path':v[0],
                                            'family':v[1]
                                            })) \
                              for k,v in split_data.items()
                             }

            for k,v in split_datasets.items():
                print(k, v.num_classes)

            classes = split_datasets['train'].classes

            train_data=get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2836)
            validation_data=get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2837)
            if 'test' in split_data.keys():
                test_data=get_data_loader(data=split_data['test'], data_subset_mode='test', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2838)

            num_demo_samples=150
            # neptune_log_augmented_images(split_data, num_demo_samples=num_demo_samples, PARAMS=PARAMS)
            cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=True, seed=2836)))
            cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=True,  seed=2836)))


        ########################################################################################
        train_image_logger_cb = ImageLoggerCallback(data=train_data, freq=20, max_images=-1, name='train', encoder=encoder)
        val_image_logger_cb = ImageLoggerCallback(data=validation_data, freq=20, max_images=-1, name='val', encoder=encoder)
        ########################################################################################

        cm_callback = ConfusionMatrixCallback(log_dir, cm_data_x, cm_data_y, classes=classes, seed=PARAMS['seed'], include_train=True)
        checkpoint = ModelCheckpoint(weights_best, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='min',restore_best_weights=restore_best_weights)
        tfboard = TensorBoard(log_dir=log_dir, histogram_freq=histogram_freq, write_images=True)
        early = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)
        callbacks = [checkpoint,tfboard,early, cm_callback, neptune_logger, train_image_logger_cb, val_image_logger_cb]
    ##########################
        if PARAMS['optimizer'] == 'Adam':
            optimizer = tf.keras.optimizers.Adam(
                learning_rate=PARAMS['lr']
            )
        elif PARAMS['optimizer'] == 'Nadam':
            optimizer = tf.keras.optimizers.Nadam(
                learning_rate=PARAMS['lr']
            )
        elif PARAMS['optimizer'] == 'SGD':
            optimizer = tf.keras.optimizers.SGD(
                learning_rate=PARAMS['lr']
            )
    ##########################
        if PARAMS['loss']=='focal_loss':
            loss = focal_loss(gamma=2.0, alpha=4.0)
        elif PARAMS['loss']=='categorical_crossentropy':
            loss = 'categorical_crossentropy'
    ##########################
        model_params = stuf(name=PARAMS['model_name'],
                            model_dir=os.path.join(experiment_dir, experiment_name, 'models'),
                            num_classes=PARAMS['num_classes'],
                            frozen_layers = PARAMS['frozen_layers'],
                            input_shape = (*PARAMS['image_size'],PARAMS['num_channels']),
                            base_learning_rate = PARAMS['lr'],
                            regularization = PARAMS['regularization'])
    ####
        if PARAMS['model_name']=='shallow':
            model = build_shallow(input_shape=model_params.input_shape,
                                  num_classes=PARAMS['num_classes'],
                                  optimizer=optimizer,
                                  loss=loss,
                                  METRICS=METRICS)

        else:
            model = build_model(model_params,
                                optimizer,
                                loss,
                                METRICS)
        print(f"TRAINING {PARAMS['model_name']}")

        model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))

        history = model.fit(train_data,
                            epochs=num_epochs,
                            callbacks=callbacks,
                            validation_data=validation_data,
                            shuffle=True,
                            initial_epoch=initial_epoch,
                            steps_per_epoch=steps_per_epoch,
                            validation_steps=validation_steps)


        if 'test' in split_data:
            results = model.evaluate(test_data,
                                    steps=len(split_data['test'][0]))
        else:
            results = model.evaluate(validation_data,
                                    steps=validation_steps)
Exemple #23
0
def test_main(args, neptune):
    # some constants
    error_scaler = 1E8
    ar = [1240, 1460]  # anomaly range @200608-03:10
    in_n = args.dim_input

    # load model and obtain some stats
    model = torch.load(args.out_dir + '/' + args.exp_id).to('cpu')

    fp = open(args.stat_file, 'r')
    lines = fp.readlines()
    x_avg = torch.tensor([float(s) for s in lines[0].split(',')])
    x_std = torch.tensor([float(s) for s in lines[1].split(',')])
    fp.close()

    # 1. load test data
    val_data = np.loadtxt(args.val_path, delimiter=',')
    val_data = torch.tensor(val_data).type(torch.float32)
    val_recon = forward(val_data, model, x_avg, x_std, args)
    val_err = torch.sum((val_recon - val_data)**2, dim=1,
                        keepdim=True)  # squared error
    ve = val_err * error_scaler

    test_data = np.loadtxt(args.test_path, delimiter=',')
    test_lbl = test_data[:, -1]
    data_len = test_data.shape[0]  # retreive labels and length
    test_data = torch.tensor(test_data[:, :-1]).type(torch.float32)
    test_recon = forward(test_data, model, x_avg, x_std, args)
    test_err = torch.sum((test_recon - test_data)**2, dim=1,
                         keepdim=True)  # squared error
    te = test_err * error_scaler

    # 2. measure validation error and test error
    neptune.set_property('validation error', torch.sum(ve).item())
    neptune.set_property('test error', torch.sum(te).item())

    # 2. plot reconstruction results
    cols = ['sensor1', 'sensor2']  # features
    ids_col = range(test_data.shape[0])  # for index

    for j, (data, recon, str) in enumerate([(val_data, val_recon,
                                             'Validation'),
                                            (test_data, test_recon, 'Test')]):
        fig, axs = plt.subplots(len(cols), 1, figsize=(12, 3))
        for i, col in enumerate(cols):
            axs[i].plot(ids_col,
                        data.numpy()[:, i],
                        '-c',
                        linewidth=2,
                        label='Raw Data')
            axs[i].plot(ids_col,
                        recon.detach().numpy()[:, i],
                        '-b',
                        linewidth=1,
                        label='Reconstructed Data')
        axs[1].legend()  # only add legend for second row
        fig.suptitle('Time Series of ' + str)
        log_chart('Data-Reconstruction', fig)

    #3. find threshold
    T = (torch.mean(ve) + 2 * torch.std(ve)).item()
    T_ = np.empty((data_len, 1))
    T_[:] = T  # for plotting threshold

    if args.use_smoothing == 1:
        ve = smooth(ve.detach().numpy(), args.window_size)
        te = smooth(
            te.detach().numpy(), args.window_size
        )  # smoothing removes first window_size samples. (Tx, 1) -> (Tx-args.window_size+1, 1)

    pred = (te > T)  # pred is  classification result
    pad = np.empty((args.window_size - 1, 1))
    pad[:] = 0
    pred = np.vstack([pad, pred])  # add 0 padding

    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    acc = accuracy_score(test_lbl, pred)
    neptune.set_property('acc', acc)
    prec = precision_score(test_lbl, pred)
    neptune.set_property('prec', prec)
    rec = recall_score(test_lbl, pred)
    neptune.set_property('rec', rec)
    f1 = f1_score(test_lbl, pred)
    neptune.set_property('f1', f1)

    # 4. draw plot
    fig = plt.figure(figsize=(24, 4))
    ids = list(range(data_len))
    pad[:] = np.nan
    te = np.vstack([pad, te])  # add nan padding
    plt.plot(ids[:ar[0]],
             te[:ar[0]],
             '-c',
             label='Test Reconstruction Error (Normal)')
    plt.plot(ids[ar[0]:ar[1]],
             te[ar[0]:ar[1]],
             '-r',
             label='Test Reconstruction Error (Anomaly)')
    plt.plot(ids[ar[1]:], te[ar[1]:], '-c')
    plt.plot(ids, T_, '--b', label='Threshold')
    plt.xlabel('Time')
    plt.ylabel('Error')
    plt.legend()
    #plt.ylim((0,2E5))
    plt.title('Reconstruction Error')
    log_chart('Reconstruction Error', fig)
def train_imagenette(PARAMS):

    neptune.append_tag(PARAMS['dataset_name'])
    neptune.append_tag(PARAMS['model_name'])

    K.clear_session()
    tf.random.set_seed(34)
    target_size = PARAMS['target_size']
    BATCH_SIZE = PARAMS['BATCH_SIZE']

    train_dataset, validation_dataset, info = create_Imagenette_dataset(
        BATCH_SIZE,
        target_size=target_size,
        augment_train=PARAMS['augment_train'])
    num_classes = info.features['label'].num_classes

    encoder = base_dataset.LabelEncoder(info.features['label'].names)

    train_dataset = train_dataset.map(
        lambda x, y: apply_preprocess(x, y, num_classes),
        num_parallel_calls=-1)
    validation_dataset = validation_dataset.map(
        lambda x, y: apply_preprocess(x, y, num_classes),
        num_parallel_calls=-1)

    PARAMS['num_classes'] = num_classes
    steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE
    validation_steps = info.splits['validation'].num_examples // BATCH_SIZE

    neptune.set_property('num_classes', num_classes)
    neptune.set_property('steps_per_epoch', steps_per_epoch)
    neptune.set_property('validation_steps', validation_steps)

    optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['learning_rate'])
    loss = 'categorical_crossentropy'
    METRICS = ['accuracy']

    base = tf.keras.applications.vgg16.VGG16(
        weights='imagenet',
        include_top=False,
        input_tensor=Input(shape=(*target_size, 3)))

    # TODO try freezing weights for input_shape != (224,224)

    model = build_head(base, num_classes=num_classes)

    model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)

    callbacks = [
        neptune_logger,
        ImageLoggerCallback(data=train_dataset,
                            freq=10,
                            max_images=-1,
                            name='train',
                            encoder=encoder),
        ImageLoggerCallback(data=validation_dataset,
                            freq=10,
                            max_images=-1,
                            name='val',
                            encoder=encoder),
        EarlyStopping(monitor='val_loss', patience=2, verbose=1)
    ]

    model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))
    pprint(PARAMS)
    history = model.fit(train_dataset,
                        epochs=10,
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        shuffle=True,
                        initial_epoch=0,
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps)
Exemple #25
0
def train_main(args, neptune):
    device = torch.device("cuda")

    # iterators
    trainiter = RNNIterator(args.tr_path,
                            stat_file=args.stat_file,
                            batch_size=args.batch_size)
    validiter = RNNIterator(args.val_path,
                            stat_file=args.stat_file,
                            batch_size=args.batch_size)
    testiter = RNNIterator(args.test_path,
                           stat_file=args.stat_file,
                           batch_size=args.batch_size)

    if args.n_cmt > 1:
        model = RNN_ESM(n_cmt=args.n_cmt,
                        dim_input=args.dim_input,
                        dim_lstm_hidden=args.dim_lstm_hidden,
                        dim_fc_hidden=args.dim_fc_hidden,
                        dim_output=args.dim_out).to(device)
    elif args.n_cmt == 1:
        model = RNN_MODEL1(dim_input=args.dim_input,
                           dim_lstm_hidden=args.dim_lstm_hidden,
                           dim_fc_hidden=args.dim_fc_hidden,
                           dim_output=args.dim_out).to(device)
    else:
        print('n_cmt must be natual number')
        import sys
        sys.exit(0)

    start = time.time()

    # train the model
    if args.n_cmt > 1:
        for i in range(args.n_cmt):
            model.model_list[i] = train(net=model.model_list[i],
                                        train_loader=trainiter,
                                        valid_loader=validiter,
                                        patience=args.patience,
                                        args=args,
                                        dtype=torch.float32,
                                        device=device,
                                        savedir=args.out_dir + '/' +
                                        args.out_file,
                                        neptune=neptune)
    else:
        model = train(net=model,
                      train_loader=trainiter,
                      valid_loader=validiter,
                      patience=args.patience,
                      args=args,
                      dtype=torch.float32,
                      device=device,
                      savedir=args.out_dir + '/' + args.out_file,
                      neptune=neptune)

    acc, prec, rec, f1 = test(model, testiter, device)
    print('acc: {:.4f} | prec: {:.4f} | rec: {:.4f} | f1: {:.4f}'.format(
        acc, prec, rec, f1))

    neptune.set_property('acc', acc)
    neptune.set_property('prec', prec)
    neptune.set_property('rec', rec)
    neptune.set_property('f1', f1)
Exemple #26
0
def record_eval_metric(neptune, metrics):
    for k, v in metrics.items():
        neptune.log_metric(k, v)


# %%
model_path = '/workspace/ml-workspace/thesis_git/thesis/models/'
best_eval_f1 = 0
# Measure the total training time for the whole run.
total_t0 = time.time()
with neptune.create_experiment(name="HierarchicalSemanticGraphNetwork",
                               params=PARAMS,
                               upload_source_files=['HSGN_GAT.py']):
    neptune.append_tag(
        ["homogeneous_graph", "GATConv", "bidirectional_token_node_edge"])
    neptune.set_property('server', 'IRGPU2')
    neptune.set_property('training_set_path', training_path)
    neptune.set_property('dev_set_path', dev_path)
    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')
# select project
neptune.init('USERNAME/example-project')

# define parameters
PARAMS = {'timeseries_factor': 1.7, 'n_iterations': 200, 'n_images': 7}

# create experiment
neptune.create_experiment(name='timeseries_example', params=PARAMS)

# log some metrics
for i in range(1, PARAMS['n_iterations']):
    neptune.log_metric('iteration', i)
    neptune.log_metric('timeseries',
                       PARAMS['timeseries_factor'] * np.cos(i / 10))
    neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2))

# log property (key:value pair)
neptune.set_property('timeseries_data_hash', '123e4567')

# add tag to the experiment
neptune.append_tag('timeseries_modeling')

# log some images
for j in range(PARAMS['n_images']):
    array = np.random.rand(10, 10, 3) * 255
    array = np.repeat(array, 30, 0)
    array = np.repeat(array, 30, 1)
    neptune.log_image('mosaics', array)

neptune.stop()
Exemple #28
0
def run_roshambo():
    seed = 0x1B
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    neptune.set_property("seed", seed)
    neptune.append_tag("ROSHAMBO")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    _logger.info("Using device type %s", str(device))

    reduction_factor = 5  # Reduce dimension axis by this factor
    neptune.set_property("reduction_factor", reduction_factor)

    width = 240 // reduction_factor
    height = 180 // reduction_factor
    n_features = width * height * 2
    batch_size = 5
    neptune.set_property("batch_size", batch_size)

    dt = 1 * ms
    neptune.set_property("dt", dt)

    bin_size = 50 * ms
    neptune.set_property("bin_size", bin_size)

    bin_steps = rescale(bin_size, dt, int)
    duration_per_sample = 500 * ms
    neptune.set_property("duration_per_sample", duration_per_sample)

    number_of_steps = rescale(duration_per_sample, dt, int)

    topology = SmallWorldTopology(
        SmallWorldTopology.Configuration(
            minicolumn_shape=(7, 7, 7),
            macrocolumn_shape=(3, 3, 3),
            minicolumn_spacing=300,
            p_max=0.025,
            sparse_init=True,
        )
    )
    n_neurons = topology.number_of_nodes()
    nb_of_bins = 1 + number_of_steps // bin_steps
    linear_readout = LinearWithBN(n_neurons * nb_of_bins, 3).to(device)
    loss_fn = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(linear_readout.parameters(), lr=0.001)
    neptune.set_property("adam.lr", 0.001)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    neptune.set_property("steplr.gamma", 0.1)
    neptune.set_property("steplr.step_size", 2)

    p_critical_configs = {
        "alpha": 0.0025,
        "beta": 0.00025,
        "tau_v": 50 * ms,
        "tau_i": 5 * ms,
        "v_th": 1.0,
    }

    for k, v in p_critical_configs.items():
        neptune.set_property(k, v)

    model = PCritical(
        n_features, batch_size, topology, dt=dt, **p_critical_configs,
    ).to(device)

    all_transforms = Compose(
        [
            ScaleDown(240, 180, factor=reduction_factor),
            ToDense(width, height, duration_per_sample, dt=dt),
            Flatten(),
        ]
    )

    label_dict = {
        "scissors": 0,
        "paper": 1,
        "rock": 2,
    }

    data = INIRoshambo(
        os.getenv("ROSHAMBO_DATASET_LOCATION_500ms_subsamples"),
        transforms=all_transforms,
    )
    train_data, val_data = split_per_user(data, train_ratio=0.85)
    _logger.info(
        "Keeping %i samples for training and %i for validation",
        len(train_data),
        len(val_data),
    )

    def labels_to_tensor(labels):
        return torch.tensor([label_dict[l] for l in labels])

    def run_batch(X, y):
        current_batch_size = len(y)
        model.batch_size = current_batch_size
        bins = torch.zeros(current_batch_size, n_neurons, nb_of_bins, device=device)
        for t in range(number_of_steps):
            out_spikes = model.forward(X[:, :, t])
            bins[:, :, t // bin_steps] += out_spikes
        return bins

    for iter_nb in range(10):
        train_generator = torch_data.DataLoader(
            train_data,
            batch_size=batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True,
            timeout=120,
        )
        for i, (X, labels) in enumerate(tqdm(train_generator)):
            if i >= 20:
                break

            neptune.log_metric("iteration", i)
            X, y = X.to(device), labels_to_tensor(labels).to(device)

            # fig, axs = plt.subplots()
            # display_spike_train(axs, X[0])
            # plt.show()
            # print(X.shape)
            # exit(0)

            bins = run_batch(X, y)

            # fig, axs = plt.subplots()
            # activity = bins[0].sum(dim=0)
            # axs.plot(np.arange(nb_of_bins), activity.cpu().numpy())
            # plt.show()

            optimizer.zero_grad()
            out = linear_readout(bins.view(len(y), -1))
            loss = loss_fn(out, y)
            loss.backward()
            optimizer.step()
            loss_val = loss.cpu().detach().item()
            _logger.info("Loss: %.3f", loss_val)
            neptune.log_metric("loss", loss_val)

        total_accurate = 0
        total_elems = 0
        val_generator = torch_data.DataLoader(
            val_data,
            batch_size=batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True,
            timeout=120,
        )
        for i, (X, labels) in enumerate(tqdm(val_generator)):
            if i >= 10:
                break
            X, y = X.to(device), labels_to_tensor(labels).to(device)
            bins = run_batch(X, y)
            out = linear_readout(bins.view(len(y), -1))
            preds = torch.argmax(out, dim=1)
            total_accurate += torch.sum(preds == y).cpu().float().item()
            total_elems += len(y)
            _logger.info("Current accuracy: %.4f", total_accurate / total_elems)
            neptune.log_metric("current_accuracy", total_accurate / total_elems)

        scheduler.step()

        _logger.info(
            "Final accuracy at iter %i: %.4f", iter_nb, total_accurate / total_elems
        )
        neptune.log_metric("final_accuracy", total_accurate / total_elems)
def train_pnas(PARAMS):
    ensure_dir_exists(PARAMS['log_dir'])
    ensure_dir_exists(PARAMS['model_dir'])
    neptune.append_tag(PARAMS['dataset_name'])
    neptune.append_tag(PARAMS['model_name'])
    neptune.append_tag(str(PARAMS['target_size']))
    neptune.append_tag(PARAMS['num_channels'])
    neptune.append_tag(PARAMS['color_mode'])
    K.clear_session()
    tf.random.set_seed(34)

    train_dataset, validation_dataset, data_files = create_dataset(
        dataset_name=PARAMS['dataset_name'],
        batch_size=PARAMS['BATCH_SIZE'],
        target_size=PARAMS['target_size'],
        num_channels=PARAMS['num_channels'],
        color_mode=PARAMS['color_mode'],
        splits=PARAMS['splits'],
        augment_train=PARAMS['augment_train'],
        aug_prob=PARAMS['aug_prob'])

    PARAMS['num_classes'] = data_files.num_classes
    PARAMS['splits_size'] = {'train': {}, 'validation': {}}
    PARAMS['splits_size'][
        'train'] = data_files.num_samples * PARAMS['splits']['train']
    PARAMS['splits_size'][
        'validation'] = data_files.num_samples * PARAMS['splits']['validation']

    steps_per_epoch = PARAMS['splits_size']['train'] // PARAMS['BATCH_SIZE']
    validation_steps = PARAMS['splits_size']['validation'] // PARAMS[
        'BATCH_SIZE']

    neptune.set_property('num_classes', PARAMS['num_classes'])
    neptune.set_property('steps_per_epoch', steps_per_epoch)
    neptune.set_property('validation_steps', validation_steps)

    encoder = base_dataset.LabelEncoder(data_files.classes)
    #     train_dataset = train_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1)
    #     validation_dataset = validation_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1)

    #     METRICS = ['accuracy']
    callbacks = [
        neptune_logger,
        ImageLoggerCallback(data=train_dataset,
                            freq=10,
                            max_images=-1,
                            name='train',
                            encoder=encoder),
        ImageLoggerCallback(data=validation_dataset,
                            freq=10,
                            max_images=-1,
                            name='val',
                            encoder=encoder),
        EarlyStopping(monitor='val_loss', patience=25, verbose=1)
    ]

    PARAMS['base_learning_rate'] = PARAMS['lr']
    PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels'])
    model = build_model(PARAMS)

    #     if PARAMS['optimizer']=='Adam':
    #         optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr'])

    #     base = tf.keras.applications.vgg16.VGG16(weights='imagenet',
    #                                              include_top=False,
    #                                              input_tensor=Input(shape=(*PARAMS['target_size'],3)))

    #     model = build_head(base, num_classes=PARAMS['num_classes'])

    #     model.compile(optimizer=optimizer,
    #                   loss=PARAMS['loss'],
    #                   metrics=METRICS)

    model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))
    pprint(PARAMS)
    history = model.fit(train_dataset,
                        epochs=PARAMS['num_epochs'],
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        shuffle=True,
                        initial_epoch=0,
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps)

    for k, v in PARAMS.items():
        neptune.set_property(str(k), str(v))

    return history
## Create an experiment and log model hyper-parameters

neptune.create_experiment(name='pytorch-run',
                          tags=['pytorch', 'MNIST'],
                          params=PARAMS)

## Log data version to the experiment

dataset = datasets.MNIST('../data',
                         train=True,
                         download=True,
                         transform=transforms.Compose([transforms.ToTensor()]))

neptune.set_property(
    'data_version',
    hashlib.md5(dataset.data.cpu().detach().numpy()).hexdigest())

## Log losses, accuracy score and image predictions during training

train_loader = torch.utils.data.DataLoader(dataset,
                                           batch_size=PARAMS['batch_size'],
                                           shuffle=True)

model = Net(PARAMS['fc_out_features'])
optimizer = optim.SGD(model.parameters(), PARAMS['lr'], PARAMS['momentum'])

for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    outputs = model(data)
    loss = F.nll_loss(outputs, target)