Example #1
0
    def save_experiment(self):

        with open(f"./output/{self._experiment._id}.ckp", "wb") as fh:
            pickle.dump(self._agent, fh)
        neptune.log_artifact(f"./output/{self._experiment._id}.ckp")
        neptune.append_tag(self._args.agent)
        neptune.append_tag(self._args.environment)
def main(args):
    init_logger()
    set_seed(args)

    if args.logger:
        neptune.init("wjdghks950/NumericHGN")
        neptune.create_experiment(name="({}) NumHGN_{}_{}_{}".format(
            args.task, args.train_batch_size, args.max_seq_len,
            args.train_file))
        neptune.append_tag("BertForSequenceClassification", "finetuning",
                           "num_augmented_HGN")

    tokenizer = load_tokenizer(args)
    train_dataset = dev_dataset = test_dataset = None
    if args.do_train:
        train_dataset = load_and_cache_examples(args, tokenizer, mode="train")
    dev_dataset = load_and_cache_examples(args, tokenizer, mode="dev")
    # test_dataset = load_and_cache_examples(args, tokenizer, mode="test")

    trainer = ParaSelectorTrainer(args, train_dataset, dev_dataset)

    if args.do_train:
        trainer.train()
        trainer.save_model()

    if args.do_eval:
        trainer.load_model()
        trainer.evaluate("dev")

    if args.logger:
        neptune.stop()
Example #3
0
def write_results(config: configure_finetuning.FinetuningConfig, results):
    """Write evaluation metrics to disk."""
    utils.log("Writing results to", config.results_txt)
    utils.mkdir(config.results_txt.rsplit("/", 1)[0])
    utils.write_pickle(results, config.results_pkl)
    with tf.io.gfile.GFile(config.results_txt, "a") as f:
        results_str = ""
        for trial_results in results:
            for task_name, task_results in trial_results.items():
                if task_name == "time" or task_name == "global_step":
                    continue
                results_str += task_name + ": " + " - ".join([
                    "{:}: {:.2f}".format(k, v)
                    for k, v in task_results.items()
                ]) + "\n"

                # Neptune Metric Logging
                neptune.append_tag('ft')
                neptune.append_tag('tensorflow')
                neptune.set_property('task', task_name)
                for k, v in task_results.items():
                    neptune.log_metric(k, v)

        f.write(results_str)
    utils.write_pickle(results, config.results_pkl)
Example #4
0
    def modify_tags(self):
        neptune.append_tags("tag1")
        neptune.append_tag(["tag2_to_remove", "tag3"])
        neptune.remove_tag("tag2_to_remove")
        neptune.remove_tag("tag4_remove_non_existing")

        exp = neptune.get_experiment()
        assert set(exp.get_tags()) == {
            "initial tag 1", "initial tag 2", "tag1", "tag3"
        }
Example #5
0
def train(cfg, network):
    if cfg.train.dataset[:4] != 'City':
        torch.multiprocessing.set_sharing_strategy('file_system')
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    if 'Coco' not in cfg.train.dataset:
        evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network,
                             optimizer,
                             scheduler,
                             recorder,
                             cfg.model_dir,
                             resume=cfg.resume)
    # set_lr_scheduler(cfg, scheduler)

    train_loader = make_data_loader(cfg, is_train=True)
    val_loader = make_data_loader(cfg, is_train=False)
    # train_loader = make_data_loader(cfg, is_train=True, max_iter=100)

    global_steps = None
    if cfg.neptune:
        global_steps = {
            'train_global_steps': 0,
            'valid_global_steps': 0,
        }

        neptune.init('hccccccccc/clean-pvnet')
        neptune.create_experiment(cfg.model_dir.split('/')[-1])
        neptune.append_tag('pose')

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        trainer.train(epoch, train_loader, optimizer, recorder, global_steps)
        scheduler.step()

        if (epoch + 1) % cfg.save_ep == 0:
            save_model(network, optimizer, scheduler, recorder, epoch,
                       cfg.model_dir)

        if (epoch + 1) % cfg.eval_ep == 0:
            if 'Coco' in cfg.train.dataset:
                trainer.val_coco(val_loader, global_steps)
            else:
                trainer.val(epoch, val_loader, evaluator, recorder)

    if cfg.neptune:
        neptune.stop()

    return network
Example #6
0
    def __init__(self, learning_rate):
        neptune.init(
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJjYjdhMGI5Ny02YTNmLTRlN2MtOTkyYi1jNDM0YjRmMjM5MDQifQ==",
            project_qualified_name="martinjms/examples",
        )
        neptune.create_experiment(
            name="caisim-example", params=dict(learning_rate=learning_rate)
        )
        neptune.append_tag("minimal-example")

        self.learning_rate = learning_rate

        # Then pytorch stuff..
        self.lin = torch.nn.Linear(50, 10)
        self.opt = torch.optim.SGD(self.lin.parameters(), learning_rate)
        self.step = 0
        self.lin.to(device)
Example #7
0
    def add_tags(self, tags):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : tags
            list of tags (strings)
            e.g.: ['tag1', 'tag2']
            
        Returns
        -------
        None.

        '''
        if self.neptune:
            neptune.append_tag(tags)
        if self.comet:
            self.comet_experiment.add_tags(tags)
Example #8
0
def run_roshambo():
    seed = 0x1B
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    neptune.set_property("seed", seed)
    neptune.append_tag("ROSHAMBO")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    _logger.info("Using device type %s", str(device))

    reduction_factor = 5  # Reduce dimension axis by this factor
    neptune.set_property("reduction_factor", reduction_factor)

    width = 240 // reduction_factor
    height = 180 // reduction_factor
    n_features = width * height * 2
    batch_size = 5
    neptune.set_property("batch_size", batch_size)

    dt = 1 * ms
    neptune.set_property("dt", dt)

    bin_size = 50 * ms
    neptune.set_property("bin_size", bin_size)

    bin_steps = rescale(bin_size, dt, int)
    duration_per_sample = 500 * ms
    neptune.set_property("duration_per_sample", duration_per_sample)

    number_of_steps = rescale(duration_per_sample, dt, int)

    topology = SmallWorldTopology(
        SmallWorldTopology.Configuration(
            minicolumn_shape=(7, 7, 7),
            macrocolumn_shape=(3, 3, 3),
            minicolumn_spacing=300,
            p_max=0.025,
            sparse_init=True,
        )
    )
    n_neurons = topology.number_of_nodes()
    nb_of_bins = 1 + number_of_steps // bin_steps
    linear_readout = LinearWithBN(n_neurons * nb_of_bins, 3).to(device)
    loss_fn = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(linear_readout.parameters(), lr=0.001)
    neptune.set_property("adam.lr", 0.001)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    neptune.set_property("steplr.gamma", 0.1)
    neptune.set_property("steplr.step_size", 2)

    p_critical_configs = {
        "alpha": 0.0025,
        "beta": 0.00025,
        "tau_v": 50 * ms,
        "tau_i": 5 * ms,
        "v_th": 1.0,
    }

    for k, v in p_critical_configs.items():
        neptune.set_property(k, v)

    model = PCritical(
        n_features, batch_size, topology, dt=dt, **p_critical_configs,
    ).to(device)

    all_transforms = Compose(
        [
            ScaleDown(240, 180, factor=reduction_factor),
            ToDense(width, height, duration_per_sample, dt=dt),
            Flatten(),
        ]
    )

    label_dict = {
        "scissors": 0,
        "paper": 1,
        "rock": 2,
    }

    data = INIRoshambo(
        os.getenv("ROSHAMBO_DATASET_LOCATION_500ms_subsamples"),
        transforms=all_transforms,
    )
    train_data, val_data = split_per_user(data, train_ratio=0.85)
    _logger.info(
        "Keeping %i samples for training and %i for validation",
        len(train_data),
        len(val_data),
    )

    def labels_to_tensor(labels):
        return torch.tensor([label_dict[l] for l in labels])

    def run_batch(X, y):
        current_batch_size = len(y)
        model.batch_size = current_batch_size
        bins = torch.zeros(current_batch_size, n_neurons, nb_of_bins, device=device)
        for t in range(number_of_steps):
            out_spikes = model.forward(X[:, :, t])
            bins[:, :, t // bin_steps] += out_spikes
        return bins

    for iter_nb in range(10):
        train_generator = torch_data.DataLoader(
            train_data,
            batch_size=batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True,
            timeout=120,
        )
        for i, (X, labels) in enumerate(tqdm(train_generator)):
            if i >= 20:
                break

            neptune.log_metric("iteration", i)
            X, y = X.to(device), labels_to_tensor(labels).to(device)

            # fig, axs = plt.subplots()
            # display_spike_train(axs, X[0])
            # plt.show()
            # print(X.shape)
            # exit(0)

            bins = run_batch(X, y)

            # fig, axs = plt.subplots()
            # activity = bins[0].sum(dim=0)
            # axs.plot(np.arange(nb_of_bins), activity.cpu().numpy())
            # plt.show()

            optimizer.zero_grad()
            out = linear_readout(bins.view(len(y), -1))
            loss = loss_fn(out, y)
            loss.backward()
            optimizer.step()
            loss_val = loss.cpu().detach().item()
            _logger.info("Loss: %.3f", loss_val)
            neptune.log_metric("loss", loss_val)

        total_accurate = 0
        total_elems = 0
        val_generator = torch_data.DataLoader(
            val_data,
            batch_size=batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True,
            timeout=120,
        )
        for i, (X, labels) in enumerate(tqdm(val_generator)):
            if i >= 10:
                break
            X, y = X.to(device), labels_to_tensor(labels).to(device)
            bins = run_batch(X, y)
            out = linear_readout(bins.view(len(y), -1))
            preds = torch.argmax(out, dim=1)
            total_accurate += torch.sum(preds == y).cpu().float().item()
            total_elems += len(y)
            _logger.info("Current accuracy: %.4f", total_accurate / total_elems)
            neptune.log_metric("current_accuracy", total_accurate / total_elems)

        scheduler.step()

        _logger.info(
            "Final accuracy at iter %i: %.4f", iter_nb, total_accurate / total_elems
        )
        neptune.log_metric("final_accuracy", total_accurate / total_elems)
Example #9
0

def record_eval_metric(neptune, metrics):
    for k, v in metrics.items():
        neptune.log_metric(k, v)


# %%
model_path = '/workspace/ml-workspace/thesis_git/thesis/models/'
best_eval_f1 = 0
# Measure the total training time for the whole run.
total_t0 = time.time()
with neptune.create_experiment(name="HierarchicalSemanticGraphNetwork",
                               params=PARAMS,
                               upload_source_files=['HSGN_GAT.py']):
    neptune.append_tag(
        ["homogeneous_graph", "GATConv", "bidirectional_token_node_edge"])
    neptune.set_property('server', 'IRGPU2')
    neptune.set_property('training_set_path', training_path)
    neptune.set_property('dev_set_path', dev_path)
    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')
Example #10
0
def main(**kwargs):

    import sys

    for k, v in kwargs.items():
        sys.argv += [k, v]

    from pprint import pprint
    import argparse
    import datetime
    import json
    import os


    parser = argparse.ArgumentParser()
    parser.add_argument('--neptune_project_name', default='jacobarose/sandbox', type=str, help='Neptune.ai project name to log under')
    parser.add_argument('--experiment_name', default='pnas_minimal_example', type=str, help='Neptune.ai experiment name to log under')
    parser.add_argument('--config_path', default=r'/home/jacob/projects/pyleaves/pyleaves/configs/example_configs/pnas_resnet_config.json', type=str, help='JSON config file')
    parser.add_argument('-gpu', '--gpu_id', default='1', type=str, help='integer number of gpu to train on', dest='gpu_id')
    parser.add_argument('-tags', '--add-tags', default=[], type=str, nargs='*', help='Add arbitrary list of tags to apply to this run in neptune', dest='tags')
    parser.add_argument('-f', default=None)
    args = parser.parse_args()

    with open(args.config_path, 'r') as config_file:
        PARAMS = json.load(config_file)

    # print(gpu)
    # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)

    pprint(PARAMS)
    import tensorflow as tf
    import neptune
    # tf.debugging.set_log_device_placement(True)
    print(tf.__version__)





    import arrow
    import numpy as np
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import io
    from stuf import stuf
    from more_itertools import unzip
    from functools import partial
    # import tensorflow as tf
    # tf.compat.v1.enable_eager_execution()
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    from pyleaves.leavesdb.tf_utils.tf_utils import set_random_seed, reset_keras_session
    import pyleaves
    from pyleaves.utils.img_utils import random_pad_image
    from pyleaves.utils.utils import ensure_dir_exists
    from pyleaves.datasets import leaves_dataset, fossil_dataset, pnas_dataset, base_dataset
    from pyleaves.models.vgg16 import VGG16, VGG16GrayScale
    from pyleaves.models import resnet, vgg16
    from tensorflow.compat.v1.keras.callbacks import Callback, ModelCheckpoint, TensorBoard, LearningRateScheduler, EarlyStopping
    from tensorflow.keras import metrics
    from tensorflow.keras.preprocessing.image import load_img, img_to_array
    from tensorflow.keras import layers
    from tensorflow.keras import backend as K
    import tensorflow_datasets as tfds
    import neptune_tensorboard as neptune_tb

    seed = 346
    # set_random_seed(seed)
    # reset_keras_session()
    def get_preprocessing_func(model_name):
        if model_name.startswith('resnet'):
            from tensorflow.keras.applications.resnet_v2 import preprocess_input
        elif model_name == 'vgg16':
            from tensorflow.keras.applications.vgg16 import preprocess_input
        elif model_name=='shallow':
            def preprocess_input(x):
                return x/255.0 # ((x/255.0)-0.5)*2.0

        return preprocess_input #lambda x,y: (preprocess_input(x),y)

    def _load_img(image_path):#, img_size=(224,224)):
        img = tf.io.read_file(image_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.convert_image_dtype(img, tf.float32)
        return img
        # return tf.compat.v1.image.resize_image_with_pad(img, *img_size)

    def _encode_label(label, num_classes=19):
        label = tf.cast(label, tf.int32)
        label = tf.one_hot(label, depth=num_classes)
        return label

    def _load_example(image_path, label, num_classes=19):
        img = _load_img(image_path)
        one_hot_label = _encode_label(label, num_classes=num_classes)
        return img, one_hot_label

    def _load_uint8_example(image_path, label, num_classes=19):
        img = tf.image.convert_image_dtype(_load_img(image_path)*255.0, dtype=tf.uint8)
        one_hot_label = _encode_label(label, num_classes=num_classes)
        return img, one_hot_label

    def rgb2gray_3channel(img, label):
        '''
        Convert rgb image to grayscale, but keep num_channels=3
        '''
        img = tf.image.rgb_to_grayscale(img)
        img = tf.image.grayscale_to_rgb(img)
        return img, label

    def rgb2gray_1channel(img, label):
        '''
        Convert rgb image to grayscale, num_channels from 3 to 1
        '''
        img = tf.image.rgb_to_grayscale(img)
        return img, label

    def log_data(logs):
        for k, v in logs.items():
            neptune.log_metric(k, v)

    neptune_logger = tf.keras.callbacks.LambdaCallback(on_epoch_end=lambda epoch, logs: log_data(logs))

    def focal_loss(gamma=2.0, alpha=4.0):

        gamma = float(gamma)
        alpha = float(alpha)

        def focal_loss_fixed(y_true, y_pred):
            """Focal loss for multi-classification
            FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
            Notice: y_pred is probability after softmax
            gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
            d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
            Focal Loss for Dense Object Detection
            https://arxiv.org/abs/1708.02002

            Arguments:
                y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
                y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

            Keyword Arguments:
                gamma {float} -- (default: {2.0})
                alpha {float} -- (default: {4.0})

            Returns:
                [tensor] -- loss.
            """
            epsilon = 1.e-9
            y_true = tf.convert_to_tensor(y_true, tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, tf.float32)

            model_out = tf.add(y_pred, epsilon)
            ce = tf.multiply(y_true, -tf.log(model_out))
            weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
            fl = tf.multiply(alpha, tf.multiply(weight, ce))
            reduced_fl = tf.reduce_max(fl, axis=1)
            return tf.reduce_mean(reduced_fl)
        return focal_loss_fixed


    def per_class_accuracy(y_true, y_pred):
        return tf.metrics.mean_per_class_accuracy(y_true, y_pred, num_classes=PARAMS['num_classes'])

    def build_model(model_params,
                    optimizer,
                    loss,
                    METRICS):

        if model_params['name']=='vgg16':
            model_builder = vgg16.VGG16GrayScale(model_params)
        elif model_params['name'].startswith('resnet'):
            model_builder = resnet.ResNet(model_params)

        base = model_builder.build_base()
        model = model_builder.build_head(base)

        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=METRICS)

        return model

    def build_shallow(input_shape=(224,224,3),
                      num_classes=10,
                      optimizer=None,
                      loss=None,
                      METRICS=None):

        model = tf.keras.models.Sequential()
        model.add(layers.Conv2D(64, (7, 7), activation='relu', input_shape=input_shape, kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.Flatten())
        model.add(layers.Dense(64*2, activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
        model.add(layers.Dense(num_classes,activation='softmax', kernel_initializer=tf.initializers.GlorotNormal()))

        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=METRICS)

        return model


    class ImageLogger:
        '''Tensorflow 2.0 version'''
        def __init__(self, log_dir: str, max_images: int, name: str):
            self.file_writer = tf.summary.create_file_writer(log_dir)
            self.log_dir = log_dir
            self.max_images = max_images
            self.name = name
            self._counter = tf.Variable(0, dtype=tf.int64)

            self.filepaths = []

        def add_log(self, img, counter=None, name=None):
            '''
            Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc)

            Currently takes a filepath pointing to an image file and logs to current neptune experiment.
            '''

            # scaled_images = (img - tf.math.reduce_min(img))/(tf.math.reduce_max(img) - tf.math.reduce_min(img))
            # keep = 0
            # scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8)
            # scaled_images = tf.expand_dims(scaled_images, 0)
            # tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images)


            scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0
            scaled_img = scaled_img.astype(np.uint32)

            neptune.log_image(log_name= name or self.name,
                              x=counter,
                              y=scaled_img)
            return scaled_img

        def __call__(self, images, labels):

            with self.file_writer.as_default():
                scaled_images = (images - tf.math.reduce_min(images))/(tf.math.reduce_max(images) - tf.math.reduce_min(images))
                keep = 0

                scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8)
                scaled_images = tf.expand_dims(scaled_images, 0)

                labels = tf.argmax(labels[[keep], :],axis=1)
                tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images)

                filepath = os.path.join(self.log_dir,'sample_images',f'{self.name}-{self._counter}.jpg')

                scaled_images = tf.image.encode_jpeg(tf.squeeze(scaled_images))
                tf.io.write_file(filename=tf.constant(filepath),
                                 contents=scaled_images)

            # self.add_log(scaled_images)
            self._counter.assign_add(1)
            return images, labels

    def _cond_apply(x, y, func, prob):
        """Conditionally apply func to x and y with probability prob.

        Parameters
        ----------
        x : type
            Input to conditionally pass through func
        y : type
            Label
        func : type
            Function to conditionally be applied to x and y
        prob : type
            Probability of applying function, within range [0.0,1.0]

        Returns
        -------
        x, y
        """
        return tf.cond((tf.random.uniform([], 0, 1) >= (1.0 - prob)), lambda: func(x,y), lambda: (x,y))


    class ImageAugmentor:
        """Short summary.

        Parameters
        ----------
        augmentations : dict
            Maps a sequence of named augmentations to a scalar probability,
             according to which they'll be conditionally applied in order.
        resize_w_pad : tuple, default=None
            Description of parameter `resize_w_pad`.
        random_crop :  tuple, default=None
            Description of parameter `random_crop`.
        random_jitter : dict
            First applies resize_w_pad, then random_crop. If user desires only 1 of these, set this to None.
            Should be a dict with 2 keys:
                'resize':(height, width)
                'crop_size':(crop_height,crop_width, channels)

        Only 1 of these 3 kwargs should be provided to any given augmentor:
        {'resize_w_pad', 'random_crop', 'random_jitter'}
        Example values for each:
            resize_w_pad=(224,224)
            random_crop=(224,224,3)
            random_jitter={'resize':(338,338),
                           'crop_size':(224,224, 3)}



        seed : int, default=None
            Random seed to apply to all augmentations

        Examples
        -------
        Examples should be written in doctest format, and
        should illustrate how to use the function/class.
        >>>

        Attributes
        ----------
        augmentations

        """

        def __init__(self,
                     name='',
                     augmentations={'rotate':1.0,
                                    'flip':1.0,
                                    'color':1.0,
                                    'rgb2gray_3channel':1.0},
                     resize_w_pad=None,
                     random_crop=None,
                     random_jitter={'resize':(338,338),
                                    'crop_size':(224,224,3)},
                     log_dir=None,
                     seed=None):

            self.name = name
            self.augmentations = augmentations
            self.seed = seed

            if resize_w_pad:
                self.target_h = resize_w_pad[0]
                self.target_w = resize_w_pad[1]
                # self.resize = self.resize_w_pad
            elif random_crop:
                self.crop_size = random_crop
                self.target_h = self.crop_size[0]
                self.target_w = self.crop_size[1]
                # self.resize = self.random_crop
            elif random_jitter:
                # self.target_h = tf.random.uniform([], random_jitter['crop_size'][0], random_jitter['resize'][0], dtype=tf.int32, seed=self.seed)
                # self.target_w = tf.random.uniform([], random_jitter['crop_size'][1], random_jitter['resize'][1], dtype=tf.int32, seed=self.seed)
                self.crop_size = random_jitter['crop_size']
                # self.resize = self.random_jitter
                self.target_h = random_jitter['crop_size'][0]
                self.target_w = random_jitter['crop_size'][1]
            self.resize = self.resize_w_pad



            self.maps = {'rotate':self.rotate,
                          'flip':self.flip,
                          'color':self.color,
                          'rgb2gray_3channel':self.rgb2gray_3channel,
                          'rgb2gray_1channel':self.rgb2gray_1channel}

            self.log_dir = log_dir

        def rotate(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Rotation augmentation

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            # Rotate 0, 90, 180, 270 degrees
            return tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32,seed=self.seed)), label

        def flip(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Flip augmentation

            Args:
                x,     tf.Tensor: Image to flip
                label, tf.Tensor: arbitrary tensor, passes through unchanged
            Returns:
                Augmented image, label
            """
            x = tf.image.random_flip_left_right(x, seed=self.seed)
            x = tf.image.random_flip_up_down(x, seed=self.seed)

            return x, label

        def color(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Color augmentation

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.random_hue(x, 0.08, seed=self.seed)
            x = tf.image.random_saturation(x, 0.6, 1.6, seed=self.seed)
            x = tf.image.random_brightness(x, 0.05, seed=self.seed)
            x = tf.image.random_contrast(x, 0.7, 1.3, seed=self.seed)
            return x, label

        def rgb2gray_3channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Convert RGB image -> grayscale image, maintain number of channels = 3

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.rgb_to_grayscale(x)
            x = tf.image.grayscale_to_rgb(x)
            return x, label

        def rgb2gray_1channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            """Convert RGB image -> grayscale image, reduce number of channels from 3 -> 1

            Args:
                x,     tf.Tensor: Image
                label, tf.Tensor: arbitrary tensor, passes through unchanged

            Returns:
                Augmented image, label
            """
            x = tf.image.rgb_to_grayscale(x)
            return x, label

        def resize_w_pad(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            # TODO Finish this
            # random_pad_image(x,min_image_size=None,max_image_size=None,pad_color=None,seed=self.seed)
            return tf.image.resize_with_pad(x, target_height=self.target_h, target_width=self.target_w), label

        def random_crop(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            return tf.image.random_crop(x, size=self.crop_size), label

        @tf.function
        def random_jitter(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
            x, label = self.resize_w_pad(x, label)
            x, label = self.random_crop(x, label)
            return x, label

        def apply_augmentations(self, dataset: tf.data.Dataset):
            """
            Call this function to apply all of the augmentation in the order of specification
            provided to the constructor __init__() of ImageAugmentor.

            Args:
                dataset, tf.data.Dataset: must yield individual examples of form (x, y)
            Returns:
                Augmented dataset
            """

            dataset = dataset.map(self.resize, num_parallel_calls=AUTOTUNE)

            for aug_name, aug_p in self.augmentations.items():
                aug = self.maps[aug_name]
                dataset = dataset.map(lambda x,y: _cond_apply(x, y, aug, prob=aug_p), num_parallel_calls=AUTOTUNE)
                # dataset = dataset.map(lambda x,y: _cond_apply(x, y, func=aug, prob=aug_p), num_parallel_calls=AUTOTUNE)

            return dataset


    class ImageLoggerCallback(Callback):
        '''Tensorflow 2.0 version

        Callback that keeps track of a tf.data.Dataset and logs the correct batch to neptune based on the current batch.
        '''
        def __init__(self, data :tf.data.Dataset, freq=1, max_images=-1, name='', encoder=None):

            self.data = data
            self.freq = freq
            self.max_images = max_images
            self.name = name
            self.encoder=encoder
            self.init_iterator()

        def init_iterator(self):
            self.data_iter = iter(self.data)
            self._batch = 0
            self._count = 0
            self.finished = False

        def yield_batch(self):
            batch_data = next(self.data_iter)
            self._batch += 1
            self._count += batch_data[0].shape[0]
            return batch_data

        def add_log(self, img, counter=None, name=None):
            '''
            Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc)

            Currently takes a filepath pointing to an image file and logs to current neptune experiment.
            '''
            scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0
            scaled_img = scaled_img.astype(np.uint32)

            neptune.log_image(log_name= name or self.name,
                              x=counter,
                              y=scaled_img)
            return scaled_img

        def on_train_batch_begin(self, batch, logs=None):
            if batch % self.freq or self.finished:
                return
            while batch >= self._batch:
                x, y = self.yield_batch()

            if self.max_images==-1:
                self.max_images=x.shape[0]

            if x.ndim==3:
                np.newaxis(x, axis=0)
            if x.shape[0]>self.max_images:
                x = x[:self.max_images,...]
                y = y[:self.max_images,...]

            x = x.numpy()
            y = np.argmax(y.numpy(),axis=1)
            if self.encoder:
                y = self.encoder.decode(y)
            for i in range(x.shape[0]):
                # self.add_log(x[i,...], counter=i, name = f'{self.name}-{y[i]}-batch_{str(self._batch).zfill(3)}')
                self.add_log(x[i,...], counter=self._count+i, name = f'{self.name}-{y[i]}')
            print(f'Batch {self._batch}: Logged {np.max([x.shape[0],self.max_images])} {self.name} images to neptune')

        def on_epoch_end(self, epoch, logs={}):
            self.finished = True


    class ConfusionMatrixCallback(Callback):
        '''Tensorflow 2.0 version'''
        def __init__(self, log_dir, imgs : dict, labels : dict, classes, freq=1, include_train=False, seed=None):
            self.file_writer = tf.summary.create_file_writer(log_dir)
            self.log_dir = log_dir
            self.seed = seed
            self._counter = 0
            assert np.all(np.array(imgs.keys()) == np.array(labels.keys()))
            self.imgs = imgs

            for k,v in labels.items():
                if v.ndim==2:
                    labels[k] = tf.argmax(v,axis=-1)
            self.labels = labels
            self.num_samples = {k:l.numpy().shape[0] for k,l in labels.items()}
            self.classes = classes
            self.freq = freq
            self.include_train = include_train

        def log_confusion_matrix(self, model, imgs, labels, epoch, name='', norm_cm=False):

            pred_labels = model.predict_classes(imgs)
            # pred_labels = tf.argmax(pred_labels,axis=-1)
            pred_labels = pred_labels[:,None]

            con_mat = tf.math.confusion_matrix(labels=labels, predictions=pred_labels, num_classes=len(self.classes)).numpy()
            if norm_cm:
                con_mat = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
            con_mat_df = pd.DataFrame(con_mat,
                             index = self.classes,
                             columns = self.classes)

            figure = plt.figure(figsize=(12, 12))
            sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues)
            plt.tight_layout()
            plt.ylabel('True label')
            plt.xlabel('Predicted label')

            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            buf.seek(0)

            image = tf.image.decode_png(buf.getvalue(), channels=4)
            image = tf.expand_dims(image, 0)

            with self.file_writer.as_default():
                tf.summary.image(name=name+'_confusion_matrix', data=image, step=self._counter)

            neptune.log_image(log_name=name+'_confusion_matrix',
                              x=self._counter,
                              y=figure)
            plt.close(figure)
            self._counter += 1

            return image

        def on_epoch_end(self, epoch, logs={}):

            if (not self.freq) or (epoch%self.freq != 0):
                return

            if self.include_train:
                cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['train'], self.labels['train'], epoch=epoch, name='train')
            cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['val'], self.labels['val'], epoch=epoch, name='val')

####################################################################################
####################################################################################
####################################################################################



    neptune.init(project_qualified_name=args.neptune_project_name)
    # neptune_tb.integrate_with_tensorflow()


    experiment_dir = '/media/data/jacob/sandbox_logs'
    experiment_name = args.experiment_name

    experiment_start_time = arrow.utcnow().format('YYYY-MM-DD_HH-mm-ss')
    log_dir =os.path.join(experiment_dir, experiment_name, 'log_dir',PARAMS['loss'], experiment_start_time)
    ensure_dir_exists(log_dir)
    print('Tensorboard log_dir: ', log_dir)
    # os.system(f'neptune tensorboard {log_dir} --project {args.neptune_project_name}')

    weights_best = os.path.join(log_dir, 'model_ckpt.h5')
    restore_best_weights=False
    histogram_freq=0
    patience=25
    num_epochs = PARAMS['num_epochs']
    initial_epoch=0

    src_db = pyleaves.DATABASE_PATH
    datasets = {
                'PNAS': pnas_dataset.PNASDataset(src_db=src_db),
                'Leaves': leaves_dataset.LeavesDataset(src_db=src_db),
                'Fossil': fossil_dataset.FossilDataset(src_db=src_db)
                }
    # data = datasets[PARAMS['dataset_name']]
    data_config = stuf(threshold=PARAMS['data_threshold'],
                       num_classes=PARAMS['num_classes']    ,
                       data_splits_meta={
                                         'train':PARAMS['train_size'],
                                         'val':PARAMS['val_size'],
                                         'test':PARAMS['test_size']
                                        }
                       )

    preprocess_input = get_preprocessing_func(PARAMS['model_name'])
    preprocess_input(tf.zeros([4, 224, 224, 3]))
    
    load_example = partial(_load_uint8_example, num_classes=data_config.num_classes)
    # load_example = partial(_load_example, num_classes=data_config.num_classes)


    if PARAMS['num_channels']==3:
        color_aug = {'rgb2gray_3channel':1.0}
    elif PARAMS['num_channels']==1:
        color_aug = {'rgb2gray_1channel':1.0}

    resize_w_pad=None
    random_jitter=None
    if not PARAMS['random_jitter']['resize']:
        resize_w_pad = PARAMS['image_size']
    else:
        random_jitter=PARAMS['random_jitter']

    TRAIN_image_augmentor = ImageAugmentor(name='train',
                                           augmentations={**PARAMS["augmentations"],
                                                          **color_aug},#'rotate':1.0,'flip':1.0,**color_aug},
                                           resize_w_pad=resize_w_pad,
                                           random_crop=None,
                                           random_jitter=random_jitter,
                                           log_dir=log_dir,
                                           seed=None)
    VAL_image_augmentor = ImageAugmentor(name='val',
                                         augmentations={**color_aug},
                                         resize_w_pad=PARAMS['image_size'],
                                         random_crop=None,
                                         random_jitter=None,
                                         log_dir=log_dir,
                                         seed=None)
    TEST_image_augmentor = ImageAugmentor(name='test',
                                          augmentations={**color_aug},
                                          resize_w_pad=PARAMS['image_size'],
                                          random_crop=None,
                                          random_jitter=None,
                                          log_dir=log_dir,
                                          seed=None)


    def neptune_log_augmented_images(split_data, num_demo_samples=40, PARAMS=PARAMS):
        num_demo_samples = 40
        cm_data_x = {'train':[],'val':[]}
        cm_data_y = {'train':[],'val':[]}
        cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=False,seed=2836)))
        cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=False, seed=2836)))

        for (k_x,v_x), (k_y, v_y) in zip(cm_data_x.items(), cm_data_y.items()):
            x = tf.data.Dataset.from_tensor_slices(v_x)
            y = tf.data.Dataset.from_tensor_slices(v_y)
            xy_data = tf.data.Dataset.zip((x, y))
            v = xy_data.map(VAL_image_augmentor.resize, num_parallel_calls=AUTOTUNE)
            v_aug = TRAIN_image_augmentor.apply_augmentations(xy_data)
            v_x, v_y = [i.numpy() for i in next(iter(v.batch(10*num_demo_samples)))]
            v_x_aug, v_y_aug = [i.numpy() for i in next(iter(v_aug.batch(10*num_demo_samples)))]
            k = k_x
            for i in range(num_demo_samples):
                print(f'Neptune: logging {k}_{i}')
                print(f'{v_x[i].shape}, {v_x_aug[i].shape}')
                idx = np.random.randint(0,len(v_x))
                if True: #'train' in k:
                    TRAIN_image_augmentor.logger.add_log(v_x[idx],counter=i, name=k)
                    TRAIN_image_augmentor.logger.add_log(v_x_aug[idx],counter=i, name=k+'_aug')


    def get_data_loader(data : tuple, data_subset_mode='train', batch_size=32, num_classes=None, infinite=True, augment=True, seed=2836):

        num_samples = len(data[0])
        x = tf.data.Dataset.from_tensor_slices(data[0])
        labels = tf.data.Dataset.from_tensor_slices(data[1])
        data = tf.data.Dataset.zip((x, labels))

        data = data.cache()
        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=num_samples)

        # data = data.map(lambda x,y: (tf.image.convert_image_dtype(load_img(x)*255.0,dtype=tf.uint8),y), num_parallel_calls=-1)
        # data = data.map(load_example, num_parallel_calls=AUTOTUNE)
        data = data.map(load_example, num_parallel_calls=AUTOTUNE)


        data = data.map(lambda x,y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE)

        if infinite:
            data = data.repeat()

        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=200, seed=seed)
            augmentor = TRAIN_image_augmentor
        elif data_subset_mode == 'val':
            augmentor = VAL_image_augmentor
        elif data_subset_mode == 'test':
            augmentor = TEST_image_augmentor

        if augment:
            data = augmentor.apply_augmentations(data)

        data = data.batch(batch_size, drop_remainder=True)

        return data.prefetch(AUTOTUNE)

    def get_tfds_data_loader(data : tf.data.Dataset, data_subset_mode='train', batch_size=32, num_samples=100, num_classes=19, infinite=True, augment=True, seed=2836):


        def encode_example(x, y):
            x = tf.image.convert_image_dtype(x, tf.float32) * 255.0
            y = _encode_label(y, num_classes=num_classes)
            return x, y

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.shuffle(buffer_size=num_samples) \
                   .cache() \
                   .map(encode_example, num_parallel_calls=AUTOTUNE)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.map(preprocess_input, num_parallel_calls=AUTOTUNE)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        if data_subset_mode == 'train':
            data = data.shuffle(buffer_size=100, seed=seed)
            augmentor = TRAIN_image_augmentor
        elif data_subset_mode == 'val':
            augmentor = VAL_image_augmentor
        elif data_subset_mode == 'test':
            augmentor = TEST_image_augmentor

        if augment:
            data = augmentor.apply_augmentations(data)

        test_d = next(iter(data))
        print(test_d[0].numpy().min())
        print(test_d[0].numpy().max())

        data = data.batch(batch_size, drop_remainder=True)
        if infinite:
            data = data.repeat()

        return data.prefetch(AUTOTUNE)






    # y_true = [[0, 1, 0], [0, 0, 1]]
    # y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

    def accuracy(y_true, y_pred):
        y_pred = tf.argmax(y_pred, axis=-1)
        y_true = tf.argmax(y_true, axis=-1)

        return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))


    def true_pos(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        return K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))

    def false_pos(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        return all_positives - true_positives

    def true_neg(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        return K.sum(1-K.round(K.clip(y_true * y_pred, 0, 1)))

    def recall(y_true, y_pred):
        # y_true = K.ones_like(y_true)
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))

        recall = true_positives / (all_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        y_true = K.ones_like(y_true)

        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))

        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        # tf.print(y_true, y_pred)
        return precision

    def f1_score(y_true, y_pred):
        m_precision = precision(y_true, y_pred)
        m_recall = recall(y_true, y_pred)
        # pdb.set_trace()
        return 2*((m_precision*m_recall)/(m_precision+m_recall+K.epsilon()))

    # def false_neg(y_true, y_pred):
    #     y_true = K.ones_like(~y_true)
    #     true_neg = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    #     all_negative = K.sum(K.round(K.clip(y_true, 0, 1)))
    #     return all_negatives - true_

        # return K.mean(K.argmax(y_true,axis=1)*K.argmax(y_pred,axis=1))

        # 'accuracy',
        # metrics.TrueNegatives(name='tn'),
        # metrics.FalseNegatives(name='fn'),
    METRICS = [
        f1_score,
        metrics.TruePositives(name='tp'),
        metrics.FalsePositives(name='fp'),
        metrics.CategoricalAccuracy(name='accuracy'),
        metrics.TopKCategoricalAccuracy(name='top_3_categorical_accuracy', k=3),
        metrics.TopKCategoricalAccuracy(name='top_5_categorical_accuracy', k=5)
    ]
    PARAMS['sys.argv'] = ' '.join(sys.argv)

    with neptune.create_experiment(name=experiment_name, params=PARAMS, upload_source_files=[__file__]):


        print('Logging experiment tags:')
        for tag in args.tags:
            print(tag)
            neptune.append_tag(tag)

        neptune.append_tag(PARAMS['dataset_name'])
        neptune.append_tag(PARAMS['model_name'])
        neptune.log_artifact(args.config_path)
        cm_data_x = {'train':[],'val':[]}
        cm_data_y = {'train':[],'val':[]}

        if PARAMS['dataset_name'] in tfds.list_builders():
            num_demo_samples=40

            tfds_builder = tfds.builder(PARAMS['dataset_name'])
            tfds_builder.download_and_prepare()

            num_samples = tfds_builder.info.splits['train'].num_examples
            num_samples_dict = {'train':int(num_samples*PARAMS['train_size']),
                            'val':int(num_samples*PARAMS['val_size']),
                            'test':int(num_samples*PARAMS['test_size'])}

            classes = tfds_builder.info.features['label'].names
            num_classes = len(classes)

            train_slice = [0,int(PARAMS['train_size']*100)]
            val_slice = [int(PARAMS['train_size']*100), int((PARAMS['train_size']+PARAMS['val_size'])*100)]
            test_slice = [100 - int(PARAMS['test_size']*100), 100]

            tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True)
            tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True)
            tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True)

            # PARAMS['batch_size']=1
            train_data = get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836)
            validation_data = get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837)
            test_data = get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838)

            # tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True)
            # tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True)
            # tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True)

            split_data = {'train':get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=num_demo_samples, num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836),
                          'val':get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=num_demo_samples, num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837),
                          'test':get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=num_demo_samples, num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838)
                          }

            steps_per_epoch=num_samples_dict['train']//PARAMS['batch_size']
            validation_steps=num_samples_dict['val']//PARAMS['batch_size']

            cm_data_x['train'], cm_data_y['train'] = next(iter(split_data['train']))
            cm_data_x['val'], cm_data_y['val'] = next(iter(split_data['val']))

        else:
            data = datasets[PARAMS['dataset_name']]
            neptune.set_property('num_classes',data.num_classes)
            neptune.set_property('class_distribution',data.metadata.class_distribution)

            encoder = base_dataset.LabelEncoder(data.data.family)
            split_data = base_dataset.preprocess_data(data, encoder, data_config)
            # import pdb;pdb.set_trace()
            for subset, subset_data in split_data.items():
                split_data[subset] = [list(i) for i in unzip(subset_data)]

            PARAMS['batch_size'] = 32

            steps_per_epoch=len(split_data['train'][0])//PARAMS['batch_size']#//10
            validation_steps=len(split_data['val'][0])//PARAMS['batch_size']#//10

            split_datasets = {
                              k:base_dataset.BaseDataset.from_dataframe(
                                pd.DataFrame({
                                            'path':v[0],
                                            'family':v[1]
                                            })) \
                              for k,v in split_data.items()
                             }

            for k,v in split_datasets.items():
                print(k, v.num_classes)

            classes = split_datasets['train'].classes

            train_data=get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2836)
            validation_data=get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2837)
            if 'test' in split_data.keys():
                test_data=get_data_loader(data=split_data['test'], data_subset_mode='test', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2838)

            num_demo_samples=150
            # neptune_log_augmented_images(split_data, num_demo_samples=num_demo_samples, PARAMS=PARAMS)
            cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=True, seed=2836)))
            cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=True,  seed=2836)))


        ########################################################################################
        train_image_logger_cb = ImageLoggerCallback(data=train_data, freq=20, max_images=-1, name='train', encoder=encoder)
        val_image_logger_cb = ImageLoggerCallback(data=validation_data, freq=20, max_images=-1, name='val', encoder=encoder)
        ########################################################################################

        cm_callback = ConfusionMatrixCallback(log_dir, cm_data_x, cm_data_y, classes=classes, seed=PARAMS['seed'], include_train=True)
        checkpoint = ModelCheckpoint(weights_best, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='min',restore_best_weights=restore_best_weights)
        tfboard = TensorBoard(log_dir=log_dir, histogram_freq=histogram_freq, write_images=True)
        early = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)
        callbacks = [checkpoint,tfboard,early, cm_callback, neptune_logger, train_image_logger_cb, val_image_logger_cb]
    ##########################
        if PARAMS['optimizer'] == 'Adam':
            optimizer = tf.keras.optimizers.Adam(
                learning_rate=PARAMS['lr']
            )
        elif PARAMS['optimizer'] == 'Nadam':
            optimizer = tf.keras.optimizers.Nadam(
                learning_rate=PARAMS['lr']
            )
        elif PARAMS['optimizer'] == 'SGD':
            optimizer = tf.keras.optimizers.SGD(
                learning_rate=PARAMS['lr']
            )
    ##########################
        if PARAMS['loss']=='focal_loss':
            loss = focal_loss(gamma=2.0, alpha=4.0)
        elif PARAMS['loss']=='categorical_crossentropy':
            loss = 'categorical_crossentropy'
    ##########################
        model_params = stuf(name=PARAMS['model_name'],
                            model_dir=os.path.join(experiment_dir, experiment_name, 'models'),
                            num_classes=PARAMS['num_classes'],
                            frozen_layers = PARAMS['frozen_layers'],
                            input_shape = (*PARAMS['image_size'],PARAMS['num_channels']),
                            base_learning_rate = PARAMS['lr'],
                            regularization = PARAMS['regularization'])
    ####
        if PARAMS['model_name']=='shallow':
            model = build_shallow(input_shape=model_params.input_shape,
                                  num_classes=PARAMS['num_classes'],
                                  optimizer=optimizer,
                                  loss=loss,
                                  METRICS=METRICS)

        else:
            model = build_model(model_params,
                                optimizer,
                                loss,
                                METRICS)
        print(f"TRAINING {PARAMS['model_name']}")

        model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))

        history = model.fit(train_data,
                            epochs=num_epochs,
                            callbacks=callbacks,
                            validation_data=validation_data,
                            shuffle=True,
                            initial_epoch=initial_epoch,
                            steps_per_epoch=steps_per_epoch,
                            validation_steps=validation_steps)


        if 'test' in split_data:
            results = model.evaluate(test_data,
                                    steps=len(split_data['test'][0]))
        else:
            results = model.evaluate(validation_data,
                                    steps=validation_steps)
Example #11
0
def main():

    # ================= Arguments ================ #
    parser = argparse.ArgumentParser(description='PyTorch Knowledge Distillation')
    parser.add_argument('--gpu', type=str, default="4", help='gpu id')
    parser.add_argument('--config', type=str, default="config", help='.json')
    args = parser.parse_args()

    # ================= Device Setup ================ #
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ================= Config Load ================ #
    with open('config/' + args.config) as config_file:
        config = json.load(config_file)

    # ================= Neptune Setup ================ #
    if config['neptune']:
        neptune.init('seongjulee/DCENet', api_token=config["neptune_token"])   # username/project-name, api_token=token from neptune
        neptune.create_experiment(name='EXP', params=config)  # name=project name (anything is ok), params=parameter list (json format)
        neptune.append_tag(args.config) # neptune tag (str or string list)

    # ================= Model Setup ================ #    
    model = nn.DataParallel(DCENet(config)).to(device) if len(args.gpu.split(',')) > 1 else DCENet(config).to(device)
    
    # ================= Loss Function ================ #
    criterion = DCENetLoss(config)

    # ================= Optimizer Setup ================ #
    optimizer = optim.Adam(model.parameters(), lr=config['lr'], betas=(0.9, 0.999), eps=1e-8, weight_decay=1e-6, amsgrad=False)

    # ================= Data Loader ================ #
    datalist = DataInfo()
    train_datalist = datalist.train_merged
    print('Train data list', train_datalist)

    test_datalist = datalist.train_biwi
    print('Test data list', test_datalist)

    np.random.seed(10)
    offsets, traj_data, occupancy = load_data(config, train_datalist, datatype="train")
    trainval_split = np.random.rand(len(offsets)) < config['split']

    train_x = offsets[trainval_split, :config['obs_seq'] - 1, 4:6]
    train_occu = occupancy[trainval_split, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]]
    train_y = offsets[trainval_split, config['obs_seq'] - 1:, 4:6]
    train_y_occu = occupancy[trainval_split, config['obs_seq'] - 1:, ..., :config['enviro_pdim'][-1]]

    val_x = offsets[~trainval_split, :config['obs_seq'] - 1, 4:6]
    val_occu = occupancy[~trainval_split, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]]
    val_y = offsets[~trainval_split, config['obs_seq'] - 1:, 4:6]
    val_y_occu = occupancy[~trainval_split, config['obs_seq'] - 1:, ..., :config['enviro_pdim'][-1]]

    print("%.0f trajectories for training\n %.0f trajectories for valiadation" %(train_x.shape[0], val_x.shape[0]))

    test_offsets, test_trajs, test_occupancy = load_data(config, test_datalist, datatype="test")
    test_x = test_offsets[:, :config['obs_seq'] - 1, 4:6]
    test_occu = test_occupancy[:, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]]
    last_obs_test = test_offsets[:, config['obs_seq'] - 2, 2:4]
    y_truth = test_offsets[:, config['obs_seq'] - 1:, :4]
    xy_truth = test_offsets[:, :, :4]

    print('test_trajs', test_trajs.shape)

    print("%.0f trajectories for testing" % (test_x.shape[0]))

    train_dataset = TrajDataset(x=train_x, x_occu=train_occu, y=train_y, y_occu=train_y_occu, mode='train')
    train_loader = DataLoader(dataset=train_dataset, batch_size=config["batch_size"], shuffle=True, num_workers=4)

    val_dataset = TrajDataset(x=val_x, x_occu=val_occu, y=val_y, y_occu=val_y_occu, mode='val')
    val_loader = DataLoader(dataset=val_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4)
    
    # test_dataset = TrajDataset(x=test_x, x_occu=test_occu, y=y_truth, y_occu=None, mode='test')
    # test_loader = DataLoader(dataset=test_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4)

    # ================= Training Loop ================ #
    early_stopping = EarlyStopping(patience=config['patience'], verbose=True, filename=args.config.split('/')[-1].replace('.json', '.pth'))
    for epoch in range(config['max_epochs']):
        train_one_epoch(config, epoch, device, model, optimizer, criterion, train_loader)
        val_loss = evaluate(config, device, model, optimizer, criterion, val_loader)
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break
    
    # ================= Test ================ #
    model.load_state_dict(torch.load(os.path.join('checkpoints', args.config.split('/')[-1].replace('.json', '.pth'))))
    model.eval()
    with torch.no_grad():
        test_x, test_occu = input2tensor(test_x, test_occu, device)
        x_latent = model.encoder_x(test_x, test_occu)
        predictions = []
        for i, x_ in enumerate(x_latent):
            last_pos = last_obs_test[i]
            x_ = x_.view(1, -1)
            for i in range(config['num_pred']):
                y_p = model.decoder(x_, train=False)
                y_p_ = np.concatenate(([last_pos], np.squeeze(y_p.cpu().numpy())), axis=0)
                y_p_sum = np.cumsum(y_p_, axis=0)
                predictions.append(y_p_sum[1:, :])

    predictions = np.reshape(predictions, [-1, config['num_pred'], config['pred_seq'], 2])

    print('Predicting done!')
    print(predictions.shape)
    plot_pred(xy_truth, predictions)
    # Get the errors for ADE, DEF, Hausdorff distance, speed deviation, heading error
    print("\nEvaluation results @top%.0f" % config['num_pred'])
    errors = get_errors(y_truth, predictions)
    check_collision(y_truth)

    ## Get the first time prediction by g
    ranked_prediction = []
    for prediction in predictions:
        ranks = gauss_rank(prediction)
        ranked_prediction.append(prediction[np.argmax(ranks)])
    ranked_prediction = np.reshape(ranked_prediction, [-1, 1, config['pred_seq'], 2])
    print("\nEvaluation results for most-likely predictions")
    ranked_errors = get_errors(y_truth, ranked_prediction)
Example #12
0
import neptune
import numpy as np

# Select project
neptune.init('neptune-workshops/AII-Optimali')

# Define parameters
PARAMS = {'decay_factor': 0.5, 'n_iterations': 117}

# Create experiment
neptune.create_experiment(name='minimal-extended', params=PARAMS)

# Log some metrics
for i in range(1, PARAMS['n_iterations']):
    neptune.log_metric('iteration', i)
    neptune.log_metric('loss', PARAMS['decay_factor'] / i**0.5)
    neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2))

# Add tag to the experiment
neptune.append_tag('quick_start')

# Log some images
for j in range(5):
    array = np.random.rand(10, 10, 3) * 255
    array = np.repeat(array, 30, 0)
    array = np.repeat(array, 30, 1)
    neptune.log_image('mosaics', array)
Example #13
0
def Eval_phase(params,which_files='test',model=None,test_dataloader=None,device=None):
    if(params['is_model']==True):
        print("model previously passed")
        model.eval()
    else:
        return 1
#         ### Have to modify in the final run
#         model=select_model(params['what_bert'],params['path_files'],params['weights'])
#         model.cuda()
#         model.eval()


    print("Running eval on ",which_files,"...")
    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    # Tracking variables 
    
    true_labels=[]
    pred_labels=[]
    logits_all=[]
    # Evaluate data for one epoch
    for step, batch in tqdm(enumerate(test_dataloader)):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)


        # `batch` contains three pytorch tensors:
        #   [0]: input ids 
        #   [1]: attention vals
        #   [2]: attention mask
        #   [3]: labels 
        b_input_ids = batch[0].to(device)
        b_att_val = batch[1].to(device)
        b_input_mask = batch[2].to(device)
        b_labels = batch[3].to(device)


        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()        
        outputs = model(b_input_ids,
            attention_vals=b_att_val,
            attention_mask=b_input_mask, 
            labels=None,device=device)
        logits = outputs[0]
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        # Calculate the accuracy for this batch of test sentences.
        # Accumulate the total accuracy.
        pred_labels+=list(np.argmax(logits, axis=1).flatten())
        true_labels+=list(label_ids.flatten())
        logits_all+=list(logits)
    
    
    
    logits_all_final=[]
    for logits in logits_all:
        logits_all_final.append(softmax(logits))
    
    testf1=f1_score(true_labels, pred_labels, average='macro')
    testacc=accuracy_score(true_labels,pred_labels)
    if(params['num_classes']==3):
        testrocauc=roc_auc_score(true_labels, logits_all_final,multi_class='ovo',average='macro')
    else:
        #testrocauc=roc_auc_score(true_labels, logits_all_final,multi_class='ovo',average='macro')
        testrocauc=0
    testprecision=precision_score(true_labels, pred_labels, average='macro')
    testrecall=recall_score(true_labels, pred_labels, average='macro')
    
    if(params['logging']!='neptune' or params['is_model'] == True):
        # Report the final accuracy for this validation run.
        print(" Accuracy: {0:.2f}".format(testacc))
        print(" Fscore: {0:.2f}".format(testf1))
        print(" Precision: {0:.2f}".format(testprecision))
        print(" Recall: {0:.2f}".format(testrecall))
        print(" Roc Auc: {0:.2f}".format(testrocauc))
        print(" Test took: {:}".format(format_time(time.time() - t0)))
        #print(ConfusionMatrix(true_labels,pred_labels))
    else:
        bert_model = params['path_files']
        language  = params['language']
        name_one=bert_model+"_"+language
        neptune.create_experiment(name_one,params=params,send_hardware_metrics=False,run_monitoring_thread=False)
        neptune.append_tag(bert_model)
        neptune.append_tag(language)
        neptune.append_tag('test')
        neptune.log_metric('test_f1score',testf1)
        neptune.log_metric('test_accuracy',testacc)
        neptune.log_metric('test_precision',testprecision)
        neptune.log_metric('test_recall',testrecall)
        neptune.log_metric('test_rocauc',testrocauc)
        neptune.stop()

    return testf1,testacc,testprecision,testrecall,testrocauc,logits_all_final
Example #14
0
    parser.add_argument('--weight_decay', type=float, help='')
    parser.add_argument('--max_epoch', type=int, help='')
    parser.add_argument('--valid_every', type=int, help='')
    parser.add_argument('--out_dir', type=str, help='')
    parser.add_argument('--out_file', type=str, help='')
    parser.add_argument('--patience', type=int, help='')
    parser.add_argument('--is_train', type=int, help='')

    parser.add_argument('--dim_input', type=int, help='')
    parser.add_argument('--dim_out', type=int, help='')
    parser.add_argument('--dim_lstm_hidden', type=int, help='')
    parser.add_argument('--dim_fc_hidden', type=int, help='')

    parser.add_argument('--rnn_len', type=int, help='')
    parser.add_argument('--name', type=str, help='')
    parser.add_argument('--tag', type=str, help='')
    parser.add_argument('--n_cmt', type=int, help='')
    args = parser.parse_args()

    params = vars(args)

    neptune.init('cjlee/AnomalyDetection-Supervised-RNN')
    experiment = neptune.create_experiment(name=args.name, params=params)
    neptune.append_tag(args.tag)

    args.out_dir = './result'
    args.out_file = experiment.id + '.pth'

    # temporary code for testing
    train_main(args, neptune)
    # Directory to save the pretrained model
    parser.add_argument("--save_dir",
                        default="./resource/checkpoint/zinc_daga/")

    args = parser.parse_args()

    # Initialize random seed and prepare CUDA device
    device = torch.device(0)
    random.seed(0)

    # Initialize neptune
    neptune.init(
        project_qualified_name="sungsoo.ahn/deep-molecular-optimization")
    neptune.create_experiment(name="pretrain", params=vars(args))
    neptune.append_tag(args.dataset)

    # Load character dict and dataset
    char_dict = SmilesCharDictionary(dataset=args.dataset,
                                     max_smi_len=args.max_smiles_length)
    dataset = load_dataset(char_dict=char_dict, smi_path=args.dataset_path)

    # Prepare neural apprentice. We set max_sampling_batch_size=0 since we do not use sampling.
    input_size = max(char_dict.char_idx.values()) + 1
    generator = SmilesGenerator(
        input_size=input_size,
        hidden_size=args.hidden_size,
        output_size=input_size,
        n_layers=args.n_layers,
        lstm_dropout=args.lstm_dropout,
    )
def training_pipeline(args):
    ###############################################################################
    # Environment setup
    ###############################################################################

    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    # Check if CUDA device is available and set training on CPUs or GPUs
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )

    device = torch.device(args.cuda_device if args.cuda else "cpu")

    ###############################################################################
    # Experiment tracking setup
    ###############################################################################
    neptune.init(project_qualified_name='karexar/GSW-dialect-classifier')
    args_dict = vars(args)
    neptune.create_experiment(params=args_dict)
    if hasattr(args, 'experiment_id'):
        neptune.append_tag(args.experiment_id)
    neptune.set_property('lm_algo', 'lstm')
    for key in args_dict.keys():
        neptune.set_property(key, args_dict[key])

    ###############################################################################
    # Load data
    ###############################################################################

    print('Loading data')
    data_manager = DataManager(args.data, device, args.batch_size,
                               args.eval_batch_size)

    ###############################################################################
    # Build the model
    ###############################################################################

    print('Building model')
    num_tokens = data_manager.vocab_size
    num_labels = data_manager.num_labels
    embeddings_matrix = None
    if args.use_pretrained_embed:
        # Load pre-trained word embeddings model
        # and generate the embeddings weight matrix for the entire vocabulary
        assert args.embed_algo is not None
        print(f'Using {args.embed_algo} pre-trained word embeddings')

        if args.embed_algo == 'word2vec':
            pretrained_embeddings = Word2VecModel(args.model_path_embed,
                                                  args.model_name_embed,
                                                  load_from_disk=True)
            embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings(
                data_manager.idx2word, args.embed_size)
        elif args.embed_algo == 'glove':
            pretrained_embeddings = GloveModel(args.model_path_embed,
                                               args.model_name_embed,
                                               load_from_disk=True)
            embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings(
                data_manager.idx2word, args.embed_size)

    model = LSTM(num_tokens, args.embed_size, args.num_hidden, args.num_layers,
                 args.dropout, num_labels, embeddings_matrix).to(device)

    print('Model architecture')
    print(model)

    criterion = nn.CrossEntropyLoss()

    ###############################################################################
    # Training code
    ###############################################################################

    print('Initialising model executor')
    model_executor = ModelExecutor(model, data_manager, device, criterion)

    if args.train_lstm:
        # Loop over epochs
        learning_rate = args.learning_rate
        best_val_accuracy = None
        last_val_accuracy = 0
        model_optimiser = optim.SGD(model.parameters(), lr=learning_rate)

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            print('Starting the training process')
            for epoch in range(1, args.epochs + 1):
                epoch_start_time = time.time()

                _, _ = model_executor.train(epoch, args.batch_size,
                                            learning_rate, model_optimiser,
                                            args.clip, args.log_interval)
                val_loss, val_accuracy = model_executor.evaluate(
                    data_manager.val_iter, args.eval_batch_size)

                # Log result in Neptune ML
                neptune.send_metric('valid_loss', epoch, val_loss)
                neptune.send_metric('valid_accuracy', epoch, val_accuracy)
                neptune.send_metric('learning_rate', epoch, learning_rate)

                if epoch % 3 == 0:
                    learning_rate *= 0.9  # correct the learning rate after some number of epochs

                print('-' * 89)
                print(
                    '| End of epoch {:3d} | Time: {:5.2f}s | Valid loss {:6.2f} | '
                    'Valid accuracy {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        val_accuracy))
                print('-' * 89)

                # Save the model if the validation accuracy is the best we've seen so far.
                if not best_val_accuracy or val_accuracy > best_val_accuracy:
                    model_executor.model.export_model(args.model_path_lstm)
                    best_val_accuracy = val_accuracy

                if val_accuracy < last_val_accuracy:
                    # Anneal the learning rate if no improvement has been seen in the validation dataset.
                    learning_rate /= 2.0

                for group in model_optimiser.param_groups:
                    group['lr'] = learning_rate

                last_val_accuracy = val_accuracy

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

    ###############################################################################
    # Evaluation code
    ###############################################################################

    test_loss = None
    test_accuracy = None
    if args.eval_lstm:
        print('Evaluating on the test set')

        # Load the best saved model.
        model_executor.load_pre_trained_model(args.model_path_lstm,
                                              device=device)

        # Run on test data.
        test_loss, test_accuracy = model_executor.evaluate(
            data_manager.test_iter, args.eval_batch_size)

        # Log result in Neptune ML
        neptune.send_metric('test_loss', test_loss)
        neptune.send_metric('test_accuracy', test_accuracy)

        print('-' * 89)
        print('| End of evaluation | Test loss {:6.2f}'.format(test_loss) +
              ' | Test accuracy {:8.2f}'.format(test_accuracy))
        print('-' * 89)

    ###############################################################################
    # Stop the experiment tracking
    ###############################################################################

    neptune.stop()

    return test_loss, test_accuracy
Example #17
0
    parameters['metrics_separately'] = args.metrics_separately
    parameters['random_val_neg_sampler'] = args.random_val_neg_sampler
    parameters['val_regenerate'] = args.val_regenerate

    if args.log:
        import neptune

        neptune.init(args.neptune_project)
        neptune_experiment_name = args.experiment_name
        neptune.create_experiment(name=neptune_experiment_name,
                                  params=parameters,
                                  upload_stdout=True,
                                  upload_stderr=True,
                                  send_hardware_metrics=True,
                                  upload_source_files='**/*.py')
        neptune.append_tag('pytorch')

        if args.gpu:
            neptune.append_tag('gpu')
        if args.use_proteins:
            neptune.append_tag('proteins')
        if args.reversed:
            neptune.append_tag('reversed')
        neptune.append_tag('real data')
        neptune.append_tag('trivec')
    use_cuda = args.gpu and torch.cuda.is_available()
    device = torch.device("cuda" if args.gpu else "cpu")
    print(f'Use device: {device}')

    kg = KnowledgeGraph(data_path=DATA_CONST['work_dir'],
                        use_proteins=args.use_proteins,
Example #18
0
def train_model(params, best_val_fscore):

    # In case of english languages, translation is the origin data itself.
    if (params['language'] == 'English'):
        params['csv_file'] = '*_full.csv'

    train_path = params['files'] + '/train/' + params['csv_file']
    val_path = params['files'] + '/val/' + params['csv_file']

    # Load the training and validation datasets
    train_files = glob.glob(train_path)
    val_files = glob.glob(val_path)

    #Load the bert tokenizer
    print('Loading BERT tokenizer...')
    tokenizer = BertTokenizer.from_pretrained(params['path_files'],
                                              do_lower_case=False)
    df_train = data_collector(train_files, params, True)
    df_val = data_collector(val_files, params, False)

    # Get the comment texts and corresponding labels
    if (params['csv_file'] == '*_full.csv'):
        sentences_train = df_train.text.values
        sentences_val = df_val.text.values
    elif (params['csv_file'] == '*_translated.csv'):
        sentences_train = df_train.translated.values
        sentences_val = df_val.translated.values

    labels_train = df_train.label.values
    labels_val = df_val.label.values
    label_counts = df_train['label'].value_counts()
    print(label_counts)
    label_weights = [(len(df_train)) / label_counts[0],
                     len(df_train) / label_counts[1]]
    print(label_weights)

    # Select the required bert model. Refer below for explanation of the parameter values.
    model = select_model(params['what_bert'], params['path_files'],
                         params['weights'])
    # Tell pytorch to run this model on the GPU.
    model.cuda()

    # Do the required encoding using the bert tokenizer
    input_train_ids, att_masks_train = combine_features(
        sentences_train, tokenizer, params['max_length'])
    input_val_ids, att_masks_val = combine_features(sentences_val, tokenizer,
                                                    params['max_length'])

    # Create dataloaders for both the train and validation datasets.
    train_dataloader = return_dataloader(input_train_ids,
                                         labels_train,
                                         att_masks_train,
                                         batch_size=params['batch_size'],
                                         is_train=params['is_train'])
    validation_dataloader = return_dataloader(input_val_ids,
                                              labels_val,
                                              att_masks_val,
                                              batch_size=params['batch_size'],
                                              is_train=False)

    # Initialize AdamW optimizer.
    optimizer = AdamW(
        model.parameters(),
        lr=params[
            'learning_rate'],  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=params['epsilon']  # args.adam_epsilon  - default is 1e-8.
    )

    # Number of training epochs (authors recommend between 2 and 4)
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * params['epochs']

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(total_steps / 10),  # Default value in run_glue.py
        num_training_steps=total_steps)

    # Set the seed value all over the place to make this reproducible.
    fix_the_random(seed_val=params['random_seed'])
    # Store the average loss after each epoch so we can plot them.
    loss_values = []

    # Create a new experiment in neptune for this run.
    bert_model = params['path_files']
    language = params['language']
    name_one = bert_model + "_" + language
    if (params['logging'] == 'neptune'):
        neptune.create_experiment(name_one,
                                  params=params,
                                  send_hardware_metrics=False,
                                  run_monitoring_thread=False)
        neptune.append_tag(bert_model)
        neptune.append_tag(language)

    # The best val fscore obtained till now, for the purpose of hyper parameter finetuning.
    best_val_fscore = best_val_fscore

    # For each epoch...
    for epoch_i in range(0, params['epochs']):
        print("")
        print('======== Epoch {:} / {:} ========'.format(
            epoch_i + 1, params['epochs']))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0
        model.train()

        # For each batch of training data...
        for step, batch in tqdm(enumerate(train_dataloader)):

            # Progress update every 40 batches.
            if step % 40 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)
            # `batch` contains three pytorch tensors:
            #   [0]: input ids
            #   [1]: attention masks
            #   [2]: labels
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()

            # Get the model outputs for this batch.
            outputs = model(b_input_ids,
                            token_type_ids=None,
                            attention_mask=b_input_mask,
                            labels=b_labels)

            # The call to `model` always returns a tuple, so we need to pull the
            # loss value out of the tuple.
            loss = outputs[0]
            if (params['logging'] == 'neptune'):
                neptune.log_metric('batch_loss', loss)
            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value
            # from the tensor.
            total_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            # Update parameters and take a step using the computed gradient.
            # The optimizer dictates the "update rule"--how the parameters are
            # modified based on their gradients, the learning rate, etc.
            optimizer.step()
            # Update the learning rate.
            scheduler.step()
        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)
        if (params['logging'] == 'neptune'):
            neptune.log_metric('avg_train_loss', avg_train_loss)

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)
        # Compute the metrics on the validation and test sets.
        val_fscore, val_accuracy = Eval_phase(params, 'val', model)
        test_fscore, test_accuracy = Eval_phase(params, 'test', model)

        #Report the final accuracy and fscore for this validation run.
        if (params['logging'] == 'neptune'):
            neptune.log_metric('val_fscore', val_fscore)
            neptune.log_metric('val_acc', val_accuracy)
            neptune.log_metric('test_fscore', test_fscore)
            neptune.log_metric('test_accuracy', test_accuracy)

        # Save the model only if the validation fscore improves. After all epochs, the best model is the final saved one.
        if (val_fscore > best_val_fscore):
            print(val_fscore, best_val_fscore)
            best_val_fscore = val_fscore

            save_model(model, tokenizer, params)

    if (params['logging'] == 'neptune'):
        neptune.stop()
    del model
    torch.cuda.empty_cache()
    return fscore, best_val_fscore
Example #19
0
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt'+run_start_time+'.pth')
        best_acc = acc
    your_file.close()

if __name__ == '__main__':
#     writer = SummaryWriter(log_dir="/home/dltdc/data/projects_logs/water_logs/", filename_suffix=run_start_time)

    trainloader, testloader = loaddata(load_water_data=True)
    net, criterion, optimizer = loadmodel(nb_class=1, img_HW=256, pretrain_model="resnet50")


    # trainloader, testloader = loaddata()
    # net, criterion, optimizer = loadmodel(nb_class=10, img_HW=8, pretrain_model='resnet18')
    

    with neptune.create_experiment(name='new-model'):
        neptune.append_tag('first')
        for epoch in range(start_epoch, start_epoch+args.nb_epoch):
            train(epoch, net, criterion, optimizer, trainloader)
            test(epoch, net, criterion, optimizer, testloader)

#     writer.close()

Example #20
0
File: run.py Project: csadrian/wae
def main():

    if FLAGS.exp == 'celebA':
        opts = configs.config_celebA
    elif FLAGS.exp == 'celebA_small':
        opts = configs.config_celebA_small
    elif FLAGS.exp == 'mnist':
        opts = configs.config_mnist
    elif FLAGS.exp == 'mnist_ord':
        opts = configs.config_mnist_ord
    elif FLAGS.exp == 'mnist_small':
        opts = configs.config_mnist_small
    elif FLAGS.exp == 'dsprites':
        opts = configs.config_dsprites
    elif FLAGS.exp == 'grassli':
        opts = configs.config_grassli
    elif FLAGS.exp == 'grassli_small':
        opts = configs.config_grassli_small
    elif FLAGS.exp == 'syn_constant_uniform':
        opts = configs.config_syn_constant_uniform
    elif FLAGS.exp == 'syn_2_constant_uniform':
        opts = configs.config_syn_2_constant_uniform
    elif FLAGS.exp == 'checkers':
        opts = configs.config_checkers
    elif FLAGS.exp == 'noise':
        opts = configs.config_noise
    elif FLAGS.exp == 'noise_unif':
        opts = configs.config_noise_unif
    else:
        assert False, 'Unknown experiment configuration'

    opts['exp'] = FLAGS.exp
    opts['seed'] = FLAGS.seed

    opts['mode'] = FLAGS.mode
    if opts['mode'] == 'test':
        assert FLAGS.checkpoint is not None, 'Checkpoint must be provided'
        opts['checkpoint'] = FLAGS.checkpoint

    if FLAGS.batch_size is not None:
        opts['batch_size'] = FLAGS.batch_size

    if FLAGS.recalculate_size is not None:
        opts['recalculate_size'] = FLAGS.recalculate_size
        assert opts['recalculate_size'] >= opts[
            'batch_size'], "recalculate_size should be at least as large as batch_size"
    else:
        opts['recalculate_size'] = opts['batch_size']

    if FLAGS.zdim is not None:
        opts['zdim'] = FLAGS.zdim
    if FLAGS.pz is not None:
        opts['pz'] = FLAGS.pz
    if FLAGS.lr is not None:
        opts['lr'] = FLAGS.lr
    if FLAGS.lr_schedule is not None:
        opts['lr_schedule'] = FLAGS.lr_schedule

    if FLAGS.w_aef is not None:
        opts['w_aef'] = FLAGS.w_aef
    if FLAGS.z_test is not None:
        opts['z_test'] = FLAGS.z_test
    if FLAGS.lambda_schedule is not None:
        opts['lambda_schedule'] = FLAGS.lambda_schedule
    if FLAGS.work_dir is not None:
        opts['work_dir'] = FLAGS.work_dir
    if FLAGS.wae_lambda is not None:
        opts['lambda'] = FLAGS.wae_lambda
    if FLAGS.enc_noise is not None:
        opts['e_noise'] = FLAGS.enc_noise
    if FLAGS.z_test_scope is not None:
        opts['z_test_scope'] = FLAGS.z_test_scope

    if FLAGS.length_lambda is not None:
        opts['length_lambda'] = FLAGS.length_lambda

    if FLAGS.grad_clip is not None:
        opts['grad_clip'] = FLAGS.grad_clip
    else:
        opts['grad_clip'] = None

    if FLAGS.rec_lambda is not None:
        opts['rec_lambda'] = FLAGS.rec_lambda
    if FLAGS.zxz_lambda is not None:
        opts['zxz_lambda'] = FLAGS.zxz_lambda
    if FLAGS.train_size is not None:
        opts['train_size'] = FLAGS.train_size
    if FLAGS.nat_size is not None:
        opts['nat_size'] = FLAGS.nat_size
    else:
        opts['nat_size'] = FLAGS.train_size
    opts['nat_resampling'] = FLAGS.nat_resampling

    opts['sinkhorn_sparse'] = FLAGS.sinkhorn_sparse
    opts['sinkhorn_sparsifier'] = FLAGS.sinkhorn_sparsifier
    opts['sparsifier_freq'] = FLAGS.sparsifier_freq
    opts['sinkhorn_unbiased'] = FLAGS.sinkhorn_unbiased
    opts['feed_by_score_from_epoch'] = FLAGS.feed_by_score_from_epoch
    opts['recalculate_size'] = FLAGS.recalculate_size
    opts['stay_lambda'] = FLAGS.stay_lambda

    opts['mover_ratio'] = FLAGS.mover_ratio
    assert opts['mover_ratio'] >= 0 and opts[
        'mover_ratio'] <= 1, "mover_ratio must be in [0,1]"

    if FLAGS.sinkhorn_iters is not None:
        opts['sinkhorn_iters'] = FLAGS.sinkhorn_iters
    if FLAGS.sinkhorn_epsilon is not None:
        opts['sinkhorn_epsilon'] = FLAGS.sinkhorn_epsilon
    if FLAGS.name is not None:
        opts['name'] = FLAGS.name
    if FLAGS.tags is not None:
        opts['tags'] = FLAGS.tags
    if FLAGS.epoch_num is not None:
        opts['epoch_num'] = FLAGS.epoch_num
    if FLAGS.e_pretrain is not None:
        opts['e_pretrain'] = FLAGS.e_pretrain
    if FLAGS.shuffle is not None:
        opts['shuffle'] = FLAGS.shuffle

    if opts['verbose']:
        pass
        #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
    utils.create_dir(opts['work_dir'])
    utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints'))

    if opts['e_noise'] == 'gaussian' and opts['pz'] != 'normal':
        assert False, 'Gaussian encoders compatible only with Gaussian prior'
        return

    # Dumping all the configs to the text file
    with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
        text.write('Parameters:\n')
        for key in opts:
            text.write('%s : %s\n' % (key, opts[key]))

    # Loading the dataset
    data = DataHandler(opts)
    assert data.num_points >= opts['batch_size'], 'Training set too small'

    if 'train_size' in opts and opts['train_size'] is not None:
        train_size = opts['train_size']
    else:
        train_size = data.num_points
    print("Train size:", train_size)

    if opts['nat_size'] == -1:
        opts['nat_size'] = train_size

    use_neptune = "NEPTUNE_API_TOKEN" in os.environ

    if opts['mode'] == 'train':
        if use_neptune:
            neptune.init(project_qualified_name="csadrian/global-sinkhorn")
            exp = neptune.create_experiment(
                params=opts,
                name=opts['name'],
                upload_source_files=['wae.py', 'run.py', 'models.py'])

            for tag in opts['tags'].split(','):
                neptune.append_tag(tag)

        # Creating WAE model
        wae = WAE(opts, train_size)
        data.num_points = train_size

        # Training WAE
        wae.train(data)

        if use_neptune:
            exp.stop()

    elif opts['mode'] == 'test':
        # Do something else
        improved_wae.improved_sampling(opts)

    elif opts['mode'] == 'generate':
        fideval.generate(opts)

    elif opts['mode'] == 'draw':
        picture_plot.createimgs(opts)
Example #21
0
 def __init__(self, tags):
     neptune.set_project('pixelneo/whoosh')
     neptune.create_experiment()
     for tag in tags:
         neptune.append_tag(tag)
Example #22
0
def train_imagenette(PARAMS):

    neptune.append_tag(PARAMS['dataset_name'])
    neptune.append_tag(PARAMS['model_name'])

    K.clear_session()
    tf.random.set_seed(34)
    target_size = PARAMS['target_size']
    BATCH_SIZE = PARAMS['BATCH_SIZE']

    train_dataset, validation_dataset, info = create_Imagenette_dataset(
        BATCH_SIZE,
        target_size=target_size,
        augment_train=PARAMS['augment_train'])
    num_classes = info.features['label'].num_classes

    encoder = base_dataset.LabelEncoder(info.features['label'].names)

    train_dataset = train_dataset.map(
        lambda x, y: apply_preprocess(x, y, num_classes),
        num_parallel_calls=-1)
    validation_dataset = validation_dataset.map(
        lambda x, y: apply_preprocess(x, y, num_classes),
        num_parallel_calls=-1)

    PARAMS['num_classes'] = num_classes
    steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE
    validation_steps = info.splits['validation'].num_examples // BATCH_SIZE

    neptune.set_property('num_classes', num_classes)
    neptune.set_property('steps_per_epoch', steps_per_epoch)
    neptune.set_property('validation_steps', validation_steps)

    optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['learning_rate'])
    loss = 'categorical_crossentropy'
    METRICS = ['accuracy']

    base = tf.keras.applications.vgg16.VGG16(
        weights='imagenet',
        include_top=False,
        input_tensor=Input(shape=(*target_size, 3)))

    # TODO try freezing weights for input_shape != (224,224)

    model = build_head(base, num_classes=num_classes)

    model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)

    callbacks = [
        neptune_logger,
        ImageLoggerCallback(data=train_dataset,
                            freq=10,
                            max_images=-1,
                            name='train',
                            encoder=encoder),
        ImageLoggerCallback(data=validation_dataset,
                            freq=10,
                            max_images=-1,
                            name='val',
                            encoder=encoder),
        EarlyStopping(monitor='val_loss', patience=2, verbose=1)
    ]

    model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))
    pprint(PARAMS)
    history = model.fit(train_dataset,
                        epochs=10,
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        shuffle=True,
                        initial_epoch=0,
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps)
Example #23
0
def train_model(params,device):
    embeddings=None
    if(params['bert_tokens']):
        train,val,test=createDatasetSplit(params)
    else:
        train,val,test,vocab_own=createDatasetSplit(params)
        params['embed_size']=vocab_own.embeddings.shape[1]
        params['vocab_size']=vocab_own.embeddings.shape[0]
        embeddings=vocab_own.embeddings
    if(params['auto_weights']):
        y_test = [ele[2] for ele in test] 
#         print(y_test)
        encoder = LabelEncoder()
        encoder.classes_ = np.load(params['class_names'],allow_pickle=True)
        params['weights']=class_weight.compute_class_weight('balanced',np.unique(y_test),y_test).astype('float32') 
        #params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32') 
        
        
    print(params['weights'])
    train_dataloader =combine_features(train,params,is_train=True)   
    validation_dataloader=combine_features(val,params,is_train=False)
    test_dataloader=combine_features(test,params,is_train=False)
    
   
    model=select_model(params,embeddings)
    
    if(params["device"]=='cuda'):
        model.cuda()
    optimizer = AdamW(model.parameters(),
                  lr = params['learning_rate'], # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = params['epsilon'] # args.adam_epsilon  - default is 1e-8.
                )


    # Number of training epochs (authors recommend between 2 and 4)
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * params['epochs']

    # Create the learning rate scheduler.
    if(params['bert_tokens']):
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = int(total_steps/10),                     num_training_steps = total_steps)

    # Set the seed value all over the place to make this reproducible.
    fix_the_random(seed_val = params['random_seed'])
    # Store the average loss after each epoch so we can plot them.
    loss_values = []
    if(params['bert_tokens']):
        bert_model = params['path_files']
        name_one=bert_model
    else:
        name_one=params['model_name']
        
    if(params['logging']=='neptune'):
        neptune.create_experiment(name_one,params=params,send_hardware_metrics=False,run_monitoring_thread=False)
        
        neptune.append_tag(name_one)
        if(params['best_params']):
            neptune.append_tag('AAAI final best')
        else:
            neptune.append_tag('AAAI final')
        
    best_val_fscore=0
    best_test_fscore=0

    best_val_roc_auc=0
    best_test_roc_auc=0
    
    best_val_precision=0
    best_test_precision=0
    
    best_val_recall=0
    best_test_recall=0
    
    
    for epoch_i in range(0, params['epochs']):
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, params['epochs']))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0
        model.train()

        # For each batch of training data...
        for step, batch in tqdm(enumerate(train_dataloader)):

            # Progress update every 40 batches.
            if step % 40 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)
                
                
            # `batch` contains three pytorch tensors:
            #   [0]: input ids 
            #   [1]: attention vals
            #   [2]: attention mask
            #   [3]: labels 
            b_input_ids = batch[0].to(device)
            b_att_val = batch[1].to(device)
            b_input_mask = batch[2].to(device)
            b_labels = batch[3].to(device)

            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()        
            outputs = model(b_input_ids, 
                attention_vals=b_att_val,
                attention_mask=b_input_mask, 
                labels=b_labels,
                device=device)

            # The call to `model` always returns a tuple, so we need to pull the 
            # loss value out of the tuple.
            
            loss = outputs[0]
           
            if(params['logging']=='neptune'):
            	neptune.log_metric('batch_loss',loss.item())
            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value 
            # from the tensor.
            total_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            # Update parameters and take a step using the computed gradient.
            # The optimizer dictates the "update rule"--how the parameters are
            # modified based on their gradients, the learning rate, etc.
            optimizer.step()
            # Update the learning rate.
            if(params['bert_tokens']):
                scheduler.step()
        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)
        if(params['logging']=='neptune'):
            neptune.log_metric('avg_train_loss',avg_train_loss)
        else:
            print('avg_train_loss',avg_train_loss)

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)
        train_fscore,train_accuracy,train_precision,train_recall,train_roc_auc,_=Eval_phase(params,'train',model,train_dataloader,device)
        val_fscore,val_accuracy,val_precision,val_recall,val_roc_auc,_=Eval_phase(params,'val',model,validation_dataloader,device)
        test_fscore,test_accuracy,test_precision,test_recall,test_roc_auc,logits_all_final=Eval_phase(params,'test',model,test_dataloader,device)

        #Report the final accuracy for this validation run.
        if(params['logging']=='neptune'):	
            neptune.log_metric('test_fscore',test_fscore)
            neptune.log_metric('test_accuracy',test_accuracy)
            neptune.log_metric('test_precision',test_precision)
            neptune.log_metric('test_recall',test_recall)
            neptune.log_metric('test_rocauc',test_roc_auc)
            
            neptune.log_metric('val_fscore',val_fscore)
            neptune.log_metric('val_accuracy',val_accuracy)
            neptune.log_metric('val_precision',val_precision)
            neptune.log_metric('val_recall',val_recall)
            neptune.log_metric('val_rocauc',val_roc_auc)
    
            neptune.log_metric('train_fscore',train_fscore)
            neptune.log_metric('train_accuracy',train_accuracy)
            neptune.log_metric('train_precision',train_precision)
            neptune.log_metric('train_recall',train_recall)
            neptune.log_metric('train_rocauc',train_roc_auc)

            
        
    
        if(val_fscore > best_val_fscore):
            print(val_fscore,best_val_fscore)
            best_val_fscore=val_fscore
            best_test_fscore=test_fscore
            best_val_roc_auc = val_roc_auc
            best_test_roc_auc = test_roc_auc
            
            
            best_val_precision = val_precision
            best_test_precision = test_precision
            best_val_recall = val_recall
            best_test_recall = test_recall
            
            if(params['bert_tokens']):
                print('Loading BERT tokenizer...')
                tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False)
                save_bert_model(model,tokenizer,params)
            else:
                print("Saving model")
                save_normal_model(model,params)

    if(params['logging']=='neptune'):
        neptune.log_metric('best_val_fscore',best_val_fscore)
        neptune.log_metric('best_test_fscore',best_test_fscore)
        neptune.log_metric('best_val_rocauc',best_val_roc_auc)
        neptune.log_metric('best_test_rocauc',best_test_roc_auc)
        neptune.log_metric('best_val_precision',best_val_precision)
        neptune.log_metric('best_test_precision',best_test_precision)
        neptune.log_metric('best_val_recall',best_val_recall)
        neptune.log_metric('best_test_recall',best_test_recall)
        
        neptune.stop()
    else:
        print('best_val_fscore',best_val_fscore)
        print('best_test_fscore',best_test_fscore)
        print('best_val_rocauc',best_val_roc_auc)
        print('best_test_rocauc',best_test_roc_auc)
        print('best_val_precision',best_val_precision)
        print('best_test_precision',best_test_precision)
        print('best_val_recall',best_val_recall)
        print('best_test_recall',best_test_recall)
        
#     del model
#     torch.cuda.empty_cache()
    return model
Example #24
0
def train_pnas(PARAMS):
    ensure_dir_exists(PARAMS['log_dir'])
    ensure_dir_exists(PARAMS['model_dir'])
    neptune.append_tag(PARAMS['dataset_name'])
    neptune.append_tag(PARAMS['model_name'])
    neptune.append_tag(str(PARAMS['target_size']))
    neptune.append_tag(PARAMS['num_channels'])
    neptune.append_tag(PARAMS['color_mode'])
    K.clear_session()
    tf.random.set_seed(34)

    train_dataset, validation_dataset, data_files = create_dataset(
        dataset_name=PARAMS['dataset_name'],
        batch_size=PARAMS['BATCH_SIZE'],
        target_size=PARAMS['target_size'],
        num_channels=PARAMS['num_channels'],
        color_mode=PARAMS['color_mode'],
        splits=PARAMS['splits'],
        augment_train=PARAMS['augment_train'],
        aug_prob=PARAMS['aug_prob'])

    PARAMS['num_classes'] = data_files.num_classes
    PARAMS['splits_size'] = {'train': {}, 'validation': {}}
    PARAMS['splits_size'][
        'train'] = data_files.num_samples * PARAMS['splits']['train']
    PARAMS['splits_size'][
        'validation'] = data_files.num_samples * PARAMS['splits']['validation']

    steps_per_epoch = PARAMS['splits_size']['train'] // PARAMS['BATCH_SIZE']
    validation_steps = PARAMS['splits_size']['validation'] // PARAMS[
        'BATCH_SIZE']

    neptune.set_property('num_classes', PARAMS['num_classes'])
    neptune.set_property('steps_per_epoch', steps_per_epoch)
    neptune.set_property('validation_steps', validation_steps)

    encoder = base_dataset.LabelEncoder(data_files.classes)
    #     train_dataset = train_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1)
    #     validation_dataset = validation_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1)

    #     METRICS = ['accuracy']
    callbacks = [
        neptune_logger,
        ImageLoggerCallback(data=train_dataset,
                            freq=10,
                            max_images=-1,
                            name='train',
                            encoder=encoder),
        ImageLoggerCallback(data=validation_dataset,
                            freq=10,
                            max_images=-1,
                            name='val',
                            encoder=encoder),
        EarlyStopping(monitor='val_loss', patience=25, verbose=1)
    ]

    PARAMS['base_learning_rate'] = PARAMS['lr']
    PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels'])
    model = build_model(PARAMS)

    #     if PARAMS['optimizer']=='Adam':
    #         optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr'])

    #     base = tf.keras.applications.vgg16.VGG16(weights='imagenet',
    #                                              include_top=False,
    #                                              input_tensor=Input(shape=(*PARAMS['target_size'],3)))

    #     model = build_head(base, num_classes=PARAMS['num_classes'])

    #     model.compile(optimizer=optimizer,
    #                   loss=PARAMS['loss'],
    #                   metrics=METRICS)

    model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))
    pprint(PARAMS)
    history = model.fit(train_dataset,
                        epochs=PARAMS['num_epochs'],
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        shuffle=True,
                        initial_epoch=0,
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps)

    for k, v in PARAMS.items():
        neptune.set_property(str(k), str(v))

    return history
Example #25
0
        Epoch:\t{}\n\
        Rounds:\t{}\n\
        Total Number of Users:\t{}\n\
        Selected Users:\t{}\n\
        Server Pure Data: {}\n\
        Mode:\t{}\n\
        Attack:\t{}\n\
        Attackers:\t{}\n\
        Output folder:\t{}".format(args.epochs, args.rounds,
                                   args.total_users_num,
                                   args.selected_users_num, args.server_pure,
                                   args.mode, args.attack_type,
                                   args.attackers_num, args.log_dir))

    # Neptune logging initialization
    if args.neptune_log:
        neptune.init(project_qualified_name=configs['log']['neptune_init'],
                     api_token=utils.get_neptune_token())
        neptune.create_experiment(name=configs['log']['neptune_exp'],
                                  upload_stdout=False,
                                  upload_stderr=False)
        neptune.append_tag(args.log_dir.split("/")[1])
    last_round = main(arguments['--start-round'])
    if args.neptune_log:
        neptune.stop()

    if last_round < args.rounds:
        exit(1)
    else:
        exit(0)
Example #26
0
def Eval_phase(params, which_files='test', model=None):

    # For english, there is no translation, hence use full dataset.
    if (params['language'] == 'English'):
        params['csv_file'] = '*_full.csv'

    # Load the files to test on
    if (which_files == 'train'):
        path = params['files'] + '/train/' + params['csv_file']
        test_files = glob.glob(path)
    if (which_files == 'val'):
        path = params['files'] + '/val/' + params['csv_file']
        test_files = glob.glob(path)
    if (which_files == 'test'):
        path = params['files'] + '/test/' + params['csv_file']
        test_files = glob.glob(path)
    '''Testing phase of the model'''
    print('Loading BERT tokenizer...')
    # Load bert tokenizer
    tokenizer = BertTokenizer.from_pretrained(params['path_files'],
                                              do_lower_case=False)

    # If model is passed, then use the given model. Else load the model from the saved location
    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    if (params['is_model'] == True):
        print("model previously passed")
        model.eval()
    else:
        model = select_model(params['what_bert'], params['path_files'],
                             params['weights'])
        model.cuda()
        model.eval()

    # Load the dataset
    df_test = data_collector(test_files, params, False)
    if (params['csv_file'] == '*_translated.csv'):
        sentences_test = df_test.translated.values
    elif (params['csv_file'] == '*_full.csv'):
        sentences_test = df_test.text.values

    labels_test = df_test.label.values
    # Encode the dataset using the tokenizer
    input_test_ids, att_masks_test = combine_features(sentences_test,
                                                      tokenizer,
                                                      params['max_length'])
    test_dataloader = return_dataloader(input_test_ids,
                                        labels_test,
                                        att_masks_test,
                                        batch_size=params['batch_size'],
                                        is_train=False)
    print("Running eval on ", which_files, "...")
    t0 = time.time()

    # Tracking variables
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    true_labels = []
    pred_labels = []
    for batch in test_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and
        # speeding up validation
        with torch.no_grad():
            outputs = model(b_input_ids,
                            token_type_ids=None,
                            attention_mask=b_input_mask)

        logits = outputs[0]
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        # Calculate the accuracy for this batch of test sentences.
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        # Accumulate the total accuracy.
        eval_accuracy += tmp_eval_accuracy

        pred_labels += list(np.argmax(logits, axis=1).flatten())
        true_labels += list(label_ids.flatten())

        # Track the number of batches
        nb_eval_steps += 1

    # Get the accuracy and macro f1 scores
    testf1 = f1_score(true_labels, pred_labels, average='macro')
    testacc = accuracy_score(true_labels, pred_labels)

    # Log the metrics obtained
    if (params['logging'] != 'neptune' or params['is_model'] == True):
        # Report the final accuracy for this validation run.
        print(" Accuracy: {0:.2f}".format(testacc))
        print(" Fscore: {0:.2f}".format(testf1))
        print(" Test took: {:}".format(format_time(time.time() - t0)))
    else:
        bert_model = params['path_files'][:-1]
        language = params['language']
        name_one = bert_model + "_" + language
        neptune.create_experiment(name_one,
                                  params=params,
                                  send_hardware_metrics=False,
                                  run_monitoring_thread=False)
        neptune.append_tag(bert_model)
        neptune.append_tag(language)
        neptune.append_tag('test')
        neptune.log_metric('test_f1score', testf1)
        neptune.log_metric('test_accuracy', testacc)
        neptune.stop()

    return testf1, testacc
def train_pyleaves_dataset(PARAMS):
    ensure_dir_exists(PARAMS['log_dir'])
    ensure_dir_exists(PARAMS['model_dir'])
    neptune.append_tag(PARAMS['dataset_name'])
    neptune.append_tag(PARAMS['model_name'])
    neptune.append_tag(str(PARAMS['target_size']))
    neptune.append_tag(PARAMS['num_channels'])
    neptune.append_tag(PARAMS['color_mode'])
    K.clear_session()
    tf.random.set_seed(PARAMS['seed'])

    train_dataset, validation_dataset, STAGE1_data_files, excluded = create_dataset(
        dataset_name=PARAMS['dataset_name'],
        threshold=PARAMS['threshold'],
        batch_size=PARAMS['BATCH_SIZE'],
        buffer_size=PARAMS['buffer_size'],
        exclude_classes=PARAMS['exclude_classes'],
        target_size=PARAMS['target_size'],
        num_channels=PARAMS['num_channels'],
        color_mode=PARAMS['color_mode'],
        splits=PARAMS['splits'],
        augmentations=PARAMS['augmentations'],
        seed=PARAMS['seed'],
        use_tfrecords=PARAMS['use_tfrecords'],
        tfrecord_dir=PARAMS['tfrecord_dir'],
        samples_per_shard=PARAMS['samples_per_shard'])

    PARAMS['num_classes'] = STAGE1_data_files.num_classes
    PARAMS['splits_size'] = {'train': {}, 'validation': {}}
    PARAMS['splits_size']['train'] = int(STAGE1_data_files.num_samples *
                                         PARAMS['splits']['train'])
    PARAMS['splits_size']['validation'] = int(STAGE1_data_files.num_samples *
                                              PARAMS['splits']['validation'])

    PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[
        'BATCH_SIZE']
    PARAMS['validation_steps'] = PARAMS['splits_size']['validation'] // PARAMS[
        'BATCH_SIZE']

    neptune.set_property('num_classes', PARAMS['num_classes'])
    neptune.set_property('steps_per_epoch', PARAMS['steps_per_epoch'])
    neptune.set_property('validation_steps', PARAMS['validation_steps'])

    # TODO: log encoder contents as dict
    encoder = base_dataset.LabelEncoder(STAGE1_data_files.classes)

    PARAMS['base_learning_rate'] = PARAMS['lr']
    PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels'])

    # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    # with strategy.scope():
    model = build_model(PARAMS)

    # model = build_or_restore_model(PARAMS)
    model.summary(print_fn=lambda x: neptune.log_text('model_summary', x))
    pprint(PARAMS)

    backup_callback = BackupAndRestore(PARAMS['checkpoints_path'])
    backup_callback.set_model(model)
    callbacks = [
        neptune_logger, backup_callback,
        EarlyStopping(monitor='val_loss',
                      patience=25,
                      verbose=1,
                      restore_best_weights=True)
    ]  #,
    #              ImageLoggerCallback(data=train_dataset, freq=1000, max_images=-1, name='train', encoder=encoder),
    #              ImageLoggerCallback(data=validation_dataset, freq=1000, max_images=-1, name='val', encoder=encoder),

    history = model.fit(train_dataset,
                        epochs=PARAMS['num_epochs'],
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        shuffle=True,
                        steps_per_epoch=PARAMS['steps_per_epoch'],
                        validation_steps=PARAMS['validation_steps'])
    #                     initial_epoch=0,

    # TODO: Change build_model to build_or_load_model
    model.save(PARAMS['saved_model_path'] + '-stage 1')
    for k, v in PARAMS.items():
        neptune.set_property(str(k), str(v))

    if PARAMS['transfer_to_PNAS'] or PARAMS['transfer_to_Fossil']:
        PARAMS['include_classes'] = STAGE1_data_files.classes
        train_dataset, validation_dataset, STAGE2_data_files, STAGE2_excluded = create_dataset(
            dataset_name=PARAMS['stage_2']
            ['dataset_name'],  #PARAMS['dataset_name'],
            threshold=PARAMS['threshold'],
            batch_size=PARAMS['BATCH_SIZE'],
            buffer_size=PARAMS['buffer_size'],
            exclude_classes=PARAMS['exclude_classes'],
            include_classes=PARAMS['include_classes'],
            target_size=PARAMS['target_size'],
            num_channels=PARAMS['num_channels'],
            color_mode=PARAMS['color_mode'],
            splits=PARAMS['splits'],
            augmentations=PARAMS['augmentations'],
            seed=PARAMS['seed'])

        PARAMS['num_classes'] = STAGE2_data_files.num_classes
        PARAMS['splits_size'] = {'train': {}, 'validation': {}}
        PARAMS['splits_size']['train'] = int(STAGE2_data_files.num_samples *
                                             PARAMS['splits']['train'])
        PARAMS['splits_size']['validation'] = int(
            STAGE2_data_files.num_samples * PARAMS['splits']['validation'])

        PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[
            'BATCH_SIZE']
        PARAMS['validation_steps'] = PARAMS['splits_size'][
            'validation'] // PARAMS['BATCH_SIZE']

        backup_callback = BackupAndRestore(PARAMS['checkpoints_path'])
        backup_callback.set_model(model)
        callbacks = [
            neptune_logger, backup_callback,
            EarlyStopping(monitor='val_loss',
                          patience=25,
                          verbose=1,
                          restore_best_weights=True)
        ]  #,

        history = model.fit(train_dataset,
                            epochs=PARAMS['num_epochs'],
                            callbacks=callbacks,
                            validation_data=validation_dataset,
                            shuffle=True,
                            steps_per_epoch=PARAMS['steps_per_epoch'],
                            validation_steps=PARAMS['validation_steps'])
    return history
    parser.add_argument("--num_jobs", type=int, default=8)
    parser.add_argument("--record_filtered", action="store_true")
    parser.add_argument("--use_atomrings", action="store_true")
    args = parser.parse_args()

    args.algorithm = "gegl_constrained"

    random.seed(0)
    device = torch.device(0)

    neptune.init(
        project_qualified_name="sungsoo.ahn/deep-molecular-optimization")
    experiment = neptune.create_experiment(name=args.algorithm,
                                           params=vars(args))
    neptune.append_tag(
        f"{args.smi_id_min:03d}_{args.smi_id_max:03d}_{args.similarity_threshold}"
        .replace(".", ""))

    char_dict = SmilesCharDictionary(dataset=args.dataset,
                                     max_smi_len=args.max_smiles_length)
    dataset = load_dataset(char_dict=char_dict, smi_path=args.dataset_path)

    if args.use_atomrings:
        similarity_constrained_penalized_logp = similarity_constrained_penalized_logp_atomrings
    else:
        similarity_constrained_penalized_logp = similarity_constrained_penalized_logp_cyclebasis

    for smi_id in range(args.smi_id_min, args.smi_id_max):
        print(f"ID: {smi_id}")
        reference_smi = dataset[smi_id]
        benchmark = similarity_constrained_penalized_logp(
Example #29
0
import neptune

neptune.init(
    project_qualified_name=
    'shared/onboarding',  # change this to your `workspace_name/project_name`
    api_token='ANONYMOUS',  # change this to your api token
)

# Step 3: Create an experiment and save parameters

neptune.create_experiment(name='great-idea', params=params)

# Step 4. Add tags to organize things

neptune.append_tag(['experiment-organization', 'me'])

# Step 5. Add logging of train and evaluation metrics

neptune.log_metric('train_f1', train_f1)
neptune.log_metric('test_f1', test_f1)

# Step 6. Run a few experiments with different parameters

# tests
current_exp = neptune.get_experiment()

correct_logs = ['train_f1', 'test_f1']

if set(current_exp.get_logs().keys()) != set(correct_logs):
    raise ValueError()
# select project
neptune.init('USERNAME/example-project')

# define parameters
PARAMS = {'timeseries_factor': 1.7, 'n_iterations': 200, 'n_images': 7}

# create experiment
neptune.create_experiment(name='timeseries_example', params=PARAMS)

# log some metrics
for i in range(1, PARAMS['n_iterations']):
    neptune.log_metric('iteration', i)
    neptune.log_metric('timeseries',
                       PARAMS['timeseries_factor'] * np.cos(i / 10))
    neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2))

# log property (key:value pair)
neptune.set_property('timeseries_data_hash', '123e4567')

# add tag to the experiment
neptune.append_tag('timeseries_modeling')

# log some images
for j in range(PARAMS['n_images']):
    array = np.random.rand(10, 10, 3) * 255
    array = np.repeat(array, 30, 0)
    array = np.repeat(array, 30, 1)
    neptune.log_image('mosaics', array)

neptune.stop()