def save_experiment(self): with open(f"./output/{self._experiment._id}.ckp", "wb") as fh: pickle.dump(self._agent, fh) neptune.log_artifact(f"./output/{self._experiment._id}.ckp") neptune.append_tag(self._args.agent) neptune.append_tag(self._args.environment)
def main(args): init_logger() set_seed(args) if args.logger: neptune.init("wjdghks950/NumericHGN") neptune.create_experiment(name="({}) NumHGN_{}_{}_{}".format( args.task, args.train_batch_size, args.max_seq_len, args.train_file)) neptune.append_tag("BertForSequenceClassification", "finetuning", "num_augmented_HGN") tokenizer = load_tokenizer(args) train_dataset = dev_dataset = test_dataset = None if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, mode="train") dev_dataset = load_and_cache_examples(args, tokenizer, mode="dev") # test_dataset = load_and_cache_examples(args, tokenizer, mode="test") trainer = ParaSelectorTrainer(args, train_dataset, dev_dataset) if args.do_train: trainer.train() trainer.save_model() if args.do_eval: trainer.load_model() trainer.evaluate("dev") if args.logger: neptune.stop()
def write_results(config: configure_finetuning.FinetuningConfig, results): """Write evaluation metrics to disk.""" utils.log("Writing results to", config.results_txt) utils.mkdir(config.results_txt.rsplit("/", 1)[0]) utils.write_pickle(results, config.results_pkl) with tf.io.gfile.GFile(config.results_txt, "a") as f: results_str = "" for trial_results in results: for task_name, task_results in trial_results.items(): if task_name == "time" or task_name == "global_step": continue results_str += task_name + ": " + " - ".join([ "{:}: {:.2f}".format(k, v) for k, v in task_results.items() ]) + "\n" # Neptune Metric Logging neptune.append_tag('ft') neptune.append_tag('tensorflow') neptune.set_property('task', task_name) for k, v in task_results.items(): neptune.log_metric(k, v) f.write(results_str) utils.write_pickle(results, config.results_pkl)
def modify_tags(self): neptune.append_tags("tag1") neptune.append_tag(["tag2_to_remove", "tag3"]) neptune.remove_tag("tag2_to_remove") neptune.remove_tag("tag4_remove_non_existing") exp = neptune.get_experiment() assert set(exp.get_tags()) == { "initial tag 1", "initial tag 2", "tag1", "tag3" }
def train(cfg, network): if cfg.train.dataset[:4] != 'City': torch.multiprocessing.set_sharing_strategy('file_system') trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) if 'Coco' not in cfg.train.dataset: evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume) # set_lr_scheduler(cfg, scheduler) train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) # train_loader = make_data_loader(cfg, is_train=True, max_iter=100) global_steps = None if cfg.neptune: global_steps = { 'train_global_steps': 0, 'valid_global_steps': 0, } neptune.init('hccccccccc/clean-pvnet') neptune.create_experiment(cfg.model_dir.split('/')[-1]) neptune.append_tag('pose') for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch trainer.train(epoch, train_loader, optimizer, recorder, global_steps) scheduler.step() if (epoch + 1) % cfg.save_ep == 0: save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir) if (epoch + 1) % cfg.eval_ep == 0: if 'Coco' in cfg.train.dataset: trainer.val_coco(val_loader, global_steps) else: trainer.val(epoch, val_loader, evaluator, recorder) if cfg.neptune: neptune.stop() return network
def __init__(self, learning_rate): neptune.init( api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJjYjdhMGI5Ny02YTNmLTRlN2MtOTkyYi1jNDM0YjRmMjM5MDQifQ==", project_qualified_name="martinjms/examples", ) neptune.create_experiment( name="caisim-example", params=dict(learning_rate=learning_rate) ) neptune.append_tag("minimal-example") self.learning_rate = learning_rate # Then pytorch stuff.. self.lin = torch.nn.Linear(50, 10) self.opt = torch.optim.SGD(self.lin.parameters(), learning_rate) self.step = 0 self.lin.to(device)
def add_tags(self, tags): ''' Adds parameters to experiment log Parameters ---------- params : tags list of tags (strings) e.g.: ['tag1', 'tag2'] Returns ------- None. ''' if self.neptune: neptune.append_tag(tags) if self.comet: self.comet_experiment.add_tags(tags)
def run_roshambo(): seed = 0x1B random.seed(seed) np.random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False neptune.set_property("seed", seed) neptune.append_tag("ROSHAMBO") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") _logger.info("Using device type %s", str(device)) reduction_factor = 5 # Reduce dimension axis by this factor neptune.set_property("reduction_factor", reduction_factor) width = 240 // reduction_factor height = 180 // reduction_factor n_features = width * height * 2 batch_size = 5 neptune.set_property("batch_size", batch_size) dt = 1 * ms neptune.set_property("dt", dt) bin_size = 50 * ms neptune.set_property("bin_size", bin_size) bin_steps = rescale(bin_size, dt, int) duration_per_sample = 500 * ms neptune.set_property("duration_per_sample", duration_per_sample) number_of_steps = rescale(duration_per_sample, dt, int) topology = SmallWorldTopology( SmallWorldTopology.Configuration( minicolumn_shape=(7, 7, 7), macrocolumn_shape=(3, 3, 3), minicolumn_spacing=300, p_max=0.025, sparse_init=True, ) ) n_neurons = topology.number_of_nodes() nb_of_bins = 1 + number_of_steps // bin_steps linear_readout = LinearWithBN(n_neurons * nb_of_bins, 3).to(device) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(linear_readout.parameters(), lr=0.001) neptune.set_property("adam.lr", 0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) neptune.set_property("steplr.gamma", 0.1) neptune.set_property("steplr.step_size", 2) p_critical_configs = { "alpha": 0.0025, "beta": 0.00025, "tau_v": 50 * ms, "tau_i": 5 * ms, "v_th": 1.0, } for k, v in p_critical_configs.items(): neptune.set_property(k, v) model = PCritical( n_features, batch_size, topology, dt=dt, **p_critical_configs, ).to(device) all_transforms = Compose( [ ScaleDown(240, 180, factor=reduction_factor), ToDense(width, height, duration_per_sample, dt=dt), Flatten(), ] ) label_dict = { "scissors": 0, "paper": 1, "rock": 2, } data = INIRoshambo( os.getenv("ROSHAMBO_DATASET_LOCATION_500ms_subsamples"), transforms=all_transforms, ) train_data, val_data = split_per_user(data, train_ratio=0.85) _logger.info( "Keeping %i samples for training and %i for validation", len(train_data), len(val_data), ) def labels_to_tensor(labels): return torch.tensor([label_dict[l] for l in labels]) def run_batch(X, y): current_batch_size = len(y) model.batch_size = current_batch_size bins = torch.zeros(current_batch_size, n_neurons, nb_of_bins, device=device) for t in range(number_of_steps): out_spikes = model.forward(X[:, :, t]) bins[:, :, t // bin_steps] += out_spikes return bins for iter_nb in range(10): train_generator = torch_data.DataLoader( train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, timeout=120, ) for i, (X, labels) in enumerate(tqdm(train_generator)): if i >= 20: break neptune.log_metric("iteration", i) X, y = X.to(device), labels_to_tensor(labels).to(device) # fig, axs = plt.subplots() # display_spike_train(axs, X[0]) # plt.show() # print(X.shape) # exit(0) bins = run_batch(X, y) # fig, axs = plt.subplots() # activity = bins[0].sum(dim=0) # axs.plot(np.arange(nb_of_bins), activity.cpu().numpy()) # plt.show() optimizer.zero_grad() out = linear_readout(bins.view(len(y), -1)) loss = loss_fn(out, y) loss.backward() optimizer.step() loss_val = loss.cpu().detach().item() _logger.info("Loss: %.3f", loss_val) neptune.log_metric("loss", loss_val) total_accurate = 0 total_elems = 0 val_generator = torch_data.DataLoader( val_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True, timeout=120, ) for i, (X, labels) in enumerate(tqdm(val_generator)): if i >= 10: break X, y = X.to(device), labels_to_tensor(labels).to(device) bins = run_batch(X, y) out = linear_readout(bins.view(len(y), -1)) preds = torch.argmax(out, dim=1) total_accurate += torch.sum(preds == y).cpu().float().item() total_elems += len(y) _logger.info("Current accuracy: %.4f", total_accurate / total_elems) neptune.log_metric("current_accuracy", total_accurate / total_elems) scheduler.step() _logger.info( "Final accuracy at iter %i: %.4f", iter_nb, total_accurate / total_elems ) neptune.log_metric("final_accuracy", total_accurate / total_elems)
def record_eval_metric(neptune, metrics): for k, v in metrics.items(): neptune.log_metric(k, v) # %% model_path = '/workspace/ml-workspace/thesis_git/thesis/models/' best_eval_f1 = 0 # Measure the total training time for the whole run. total_t0 = time.time() with neptune.create_experiment(name="HierarchicalSemanticGraphNetwork", params=PARAMS, upload_source_files=['HSGN_GAT.py']): neptune.append_tag( ["homogeneous_graph", "GATConv", "bidirectional_token_node_edge"]) neptune.set_property('server', 'IRGPU2') neptune.set_property('training_set_path', training_path) neptune.set_property('dev_set_path', dev_path) # For each epoch... for epoch_i in range(0, epochs): # ======================================== # Training # ======================================== # Perform one full pass over the training set. print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) print('Training...')
def main(**kwargs): import sys for k, v in kwargs.items(): sys.argv += [k, v] from pprint import pprint import argparse import datetime import json import os parser = argparse.ArgumentParser() parser.add_argument('--neptune_project_name', default='jacobarose/sandbox', type=str, help='Neptune.ai project name to log under') parser.add_argument('--experiment_name', default='pnas_minimal_example', type=str, help='Neptune.ai experiment name to log under') parser.add_argument('--config_path', default=r'/home/jacob/projects/pyleaves/pyleaves/configs/example_configs/pnas_resnet_config.json', type=str, help='JSON config file') parser.add_argument('-gpu', '--gpu_id', default='1', type=str, help='integer number of gpu to train on', dest='gpu_id') parser.add_argument('-tags', '--add-tags', default=[], type=str, nargs='*', help='Add arbitrary list of tags to apply to this run in neptune', dest='tags') parser.add_argument('-f', default=None) args = parser.parse_args() with open(args.config_path, 'r') as config_file: PARAMS = json.load(config_file) # print(gpu) # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) pprint(PARAMS) import tensorflow as tf import neptune # tf.debugging.set_log_device_placement(True) print(tf.__version__) import arrow import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import io from stuf import stuf from more_itertools import unzip from functools import partial # import tensorflow as tf # tf.compat.v1.enable_eager_execution() AUTOTUNE = tf.data.experimental.AUTOTUNE from pyleaves.leavesdb.tf_utils.tf_utils import set_random_seed, reset_keras_session import pyleaves from pyleaves.utils.img_utils import random_pad_image from pyleaves.utils.utils import ensure_dir_exists from pyleaves.datasets import leaves_dataset, fossil_dataset, pnas_dataset, base_dataset from pyleaves.models.vgg16 import VGG16, VGG16GrayScale from pyleaves.models import resnet, vgg16 from tensorflow.compat.v1.keras.callbacks import Callback, ModelCheckpoint, TensorBoard, LearningRateScheduler, EarlyStopping from tensorflow.keras import metrics from tensorflow.keras.preprocessing.image import load_img, img_to_array from tensorflow.keras import layers from tensorflow.keras import backend as K import tensorflow_datasets as tfds import neptune_tensorboard as neptune_tb seed = 346 # set_random_seed(seed) # reset_keras_session() def get_preprocessing_func(model_name): if model_name.startswith('resnet'): from tensorflow.keras.applications.resnet_v2 import preprocess_input elif model_name == 'vgg16': from tensorflow.keras.applications.vgg16 import preprocess_input elif model_name=='shallow': def preprocess_input(x): return x/255.0 # ((x/255.0)-0.5)*2.0 return preprocess_input #lambda x,y: (preprocess_input(x),y) def _load_img(image_path):#, img_size=(224,224)): img = tf.io.read_file(image_path) img = tf.image.decode_jpeg(img, channels=3) img = tf.image.convert_image_dtype(img, tf.float32) return img # return tf.compat.v1.image.resize_image_with_pad(img, *img_size) def _encode_label(label, num_classes=19): label = tf.cast(label, tf.int32) label = tf.one_hot(label, depth=num_classes) return label def _load_example(image_path, label, num_classes=19): img = _load_img(image_path) one_hot_label = _encode_label(label, num_classes=num_classes) return img, one_hot_label def _load_uint8_example(image_path, label, num_classes=19): img = tf.image.convert_image_dtype(_load_img(image_path)*255.0, dtype=tf.uint8) one_hot_label = _encode_label(label, num_classes=num_classes) return img, one_hot_label def rgb2gray_3channel(img, label): ''' Convert rgb image to grayscale, but keep num_channels=3 ''' img = tf.image.rgb_to_grayscale(img) img = tf.image.grayscale_to_rgb(img) return img, label def rgb2gray_1channel(img, label): ''' Convert rgb image to grayscale, num_channels from 3 to 1 ''' img = tf.image.rgb_to_grayscale(img) return img, label def log_data(logs): for k, v in logs.items(): neptune.log_metric(k, v) neptune_logger = tf.keras.callbacks.LambdaCallback(on_epoch_end=lambda epoch, logs: log_data(logs)) def focal_loss(gamma=2.0, alpha=4.0): gamma = float(gamma) alpha = float(alpha) def focal_loss_fixed(y_true, y_pred): """Focal loss for multi-classification FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t) Notice: y_pred is probability after softmax gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x) Focal Loss for Dense Object Detection https://arxiv.org/abs/1708.02002 Arguments: y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls] y_pred {tensor} -- model's output, shape of [batch_size, num_cls] Keyword Arguments: gamma {float} -- (default: {2.0}) alpha {float} -- (default: {4.0}) Returns: [tensor] -- loss. """ epsilon = 1.e-9 y_true = tf.convert_to_tensor(y_true, tf.float32) y_pred = tf.convert_to_tensor(y_pred, tf.float32) model_out = tf.add(y_pred, epsilon) ce = tf.multiply(y_true, -tf.log(model_out)) weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma)) fl = tf.multiply(alpha, tf.multiply(weight, ce)) reduced_fl = tf.reduce_max(fl, axis=1) return tf.reduce_mean(reduced_fl) return focal_loss_fixed def per_class_accuracy(y_true, y_pred): return tf.metrics.mean_per_class_accuracy(y_true, y_pred, num_classes=PARAMS['num_classes']) def build_model(model_params, optimizer, loss, METRICS): if model_params['name']=='vgg16': model_builder = vgg16.VGG16GrayScale(model_params) elif model_params['name'].startswith('resnet'): model_builder = resnet.ResNet(model_params) base = model_builder.build_base() model = model_builder.build_head(base) model.compile(optimizer=optimizer, loss=loss, metrics=METRICS) return model def build_shallow(input_shape=(224,224,3), num_classes=10, optimizer=None, loss=None, METRICS=None): model = tf.keras.models.Sequential() model.add(layers.Conv2D(64, (7, 7), activation='relu', input_shape=input_shape, kernel_initializer=tf.initializers.GlorotNormal())) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal())) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (7, 7), activation='relu', kernel_initializer=tf.initializers.GlorotNormal())) model.add(layers.Flatten()) model.add(layers.Dense(64*2, activation='relu', kernel_initializer=tf.initializers.GlorotNormal())) model.add(layers.Dense(num_classes,activation='softmax', kernel_initializer=tf.initializers.GlorotNormal())) model.compile(optimizer=optimizer, loss=loss, metrics=METRICS) return model class ImageLogger: '''Tensorflow 2.0 version''' def __init__(self, log_dir: str, max_images: int, name: str): self.file_writer = tf.summary.create_file_writer(log_dir) self.log_dir = log_dir self.max_images = max_images self.name = name self._counter = tf.Variable(0, dtype=tf.int64) self.filepaths = [] def add_log(self, img, counter=None, name=None): ''' Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc) Currently takes a filepath pointing to an image file and logs to current neptune experiment. ''' # scaled_images = (img - tf.math.reduce_min(img))/(tf.math.reduce_max(img) - tf.math.reduce_min(img)) # keep = 0 # scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8) # scaled_images = tf.expand_dims(scaled_images, 0) # tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images) scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0 scaled_img = scaled_img.astype(np.uint32) neptune.log_image(log_name= name or self.name, x=counter, y=scaled_img) return scaled_img def __call__(self, images, labels): with self.file_writer.as_default(): scaled_images = (images - tf.math.reduce_min(images))/(tf.math.reduce_max(images) - tf.math.reduce_min(images)) keep = 0 scaled_images = tf.image.convert_image_dtype(tf.squeeze(scaled_images[keep,:,:,:]), dtype=tf.uint8) scaled_images = tf.expand_dims(scaled_images, 0) labels = tf.argmax(labels[[keep], :],axis=1) tf.summary.image(name=self.name, data=scaled_images, step=self._counter, max_outputs=self.max_images) filepath = os.path.join(self.log_dir,'sample_images',f'{self.name}-{self._counter}.jpg') scaled_images = tf.image.encode_jpeg(tf.squeeze(scaled_images)) tf.io.write_file(filename=tf.constant(filepath), contents=scaled_images) # self.add_log(scaled_images) self._counter.assign_add(1) return images, labels def _cond_apply(x, y, func, prob): """Conditionally apply func to x and y with probability prob. Parameters ---------- x : type Input to conditionally pass through func y : type Label func : type Function to conditionally be applied to x and y prob : type Probability of applying function, within range [0.0,1.0] Returns ------- x, y """ return tf.cond((tf.random.uniform([], 0, 1) >= (1.0 - prob)), lambda: func(x,y), lambda: (x,y)) class ImageAugmentor: """Short summary. Parameters ---------- augmentations : dict Maps a sequence of named augmentations to a scalar probability, according to which they'll be conditionally applied in order. resize_w_pad : tuple, default=None Description of parameter `resize_w_pad`. random_crop : tuple, default=None Description of parameter `random_crop`. random_jitter : dict First applies resize_w_pad, then random_crop. If user desires only 1 of these, set this to None. Should be a dict with 2 keys: 'resize':(height, width) 'crop_size':(crop_height,crop_width, channels) Only 1 of these 3 kwargs should be provided to any given augmentor: {'resize_w_pad', 'random_crop', 'random_jitter'} Example values for each: resize_w_pad=(224,224) random_crop=(224,224,3) random_jitter={'resize':(338,338), 'crop_size':(224,224, 3)} seed : int, default=None Random seed to apply to all augmentations Examples ------- Examples should be written in doctest format, and should illustrate how to use the function/class. >>> Attributes ---------- augmentations """ def __init__(self, name='', augmentations={'rotate':1.0, 'flip':1.0, 'color':1.0, 'rgb2gray_3channel':1.0}, resize_w_pad=None, random_crop=None, random_jitter={'resize':(338,338), 'crop_size':(224,224,3)}, log_dir=None, seed=None): self.name = name self.augmentations = augmentations self.seed = seed if resize_w_pad: self.target_h = resize_w_pad[0] self.target_w = resize_w_pad[1] # self.resize = self.resize_w_pad elif random_crop: self.crop_size = random_crop self.target_h = self.crop_size[0] self.target_w = self.crop_size[1] # self.resize = self.random_crop elif random_jitter: # self.target_h = tf.random.uniform([], random_jitter['crop_size'][0], random_jitter['resize'][0], dtype=tf.int32, seed=self.seed) # self.target_w = tf.random.uniform([], random_jitter['crop_size'][1], random_jitter['resize'][1], dtype=tf.int32, seed=self.seed) self.crop_size = random_jitter['crop_size'] # self.resize = self.random_jitter self.target_h = random_jitter['crop_size'][0] self.target_w = random_jitter['crop_size'][1] self.resize = self.resize_w_pad self.maps = {'rotate':self.rotate, 'flip':self.flip, 'color':self.color, 'rgb2gray_3channel':self.rgb2gray_3channel, 'rgb2gray_1channel':self.rgb2gray_1channel} self.log_dir = log_dir def rotate(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: """Rotation augmentation Args: x, tf.Tensor: Image label, tf.Tensor: arbitrary tensor, passes through unchanged Returns: Augmented image, label """ # Rotate 0, 90, 180, 270 degrees return tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32,seed=self.seed)), label def flip(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: """Flip augmentation Args: x, tf.Tensor: Image to flip label, tf.Tensor: arbitrary tensor, passes through unchanged Returns: Augmented image, label """ x = tf.image.random_flip_left_right(x, seed=self.seed) x = tf.image.random_flip_up_down(x, seed=self.seed) return x, label def color(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: """Color augmentation Args: x, tf.Tensor: Image label, tf.Tensor: arbitrary tensor, passes through unchanged Returns: Augmented image, label """ x = tf.image.random_hue(x, 0.08, seed=self.seed) x = tf.image.random_saturation(x, 0.6, 1.6, seed=self.seed) x = tf.image.random_brightness(x, 0.05, seed=self.seed) x = tf.image.random_contrast(x, 0.7, 1.3, seed=self.seed) return x, label def rgb2gray_3channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: """Convert RGB image -> grayscale image, maintain number of channels = 3 Args: x, tf.Tensor: Image label, tf.Tensor: arbitrary tensor, passes through unchanged Returns: Augmented image, label """ x = tf.image.rgb_to_grayscale(x) x = tf.image.grayscale_to_rgb(x) return x, label def rgb2gray_1channel(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: """Convert RGB image -> grayscale image, reduce number of channels from 3 -> 1 Args: x, tf.Tensor: Image label, tf.Tensor: arbitrary tensor, passes through unchanged Returns: Augmented image, label """ x = tf.image.rgb_to_grayscale(x) return x, label def resize_w_pad(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: # TODO Finish this # random_pad_image(x,min_image_size=None,max_image_size=None,pad_color=None,seed=self.seed) return tf.image.resize_with_pad(x, target_height=self.target_h, target_width=self.target_w), label def random_crop(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: return tf.image.random_crop(x, size=self.crop_size), label @tf.function def random_jitter(self, x: tf.Tensor, label: tf.Tensor) -> tf.Tensor: x, label = self.resize_w_pad(x, label) x, label = self.random_crop(x, label) return x, label def apply_augmentations(self, dataset: tf.data.Dataset): """ Call this function to apply all of the augmentation in the order of specification provided to the constructor __init__() of ImageAugmentor. Args: dataset, tf.data.Dataset: must yield individual examples of form (x, y) Returns: Augmented dataset """ dataset = dataset.map(self.resize, num_parallel_calls=AUTOTUNE) for aug_name, aug_p in self.augmentations.items(): aug = self.maps[aug_name] dataset = dataset.map(lambda x,y: _cond_apply(x, y, aug, prob=aug_p), num_parallel_calls=AUTOTUNE) # dataset = dataset.map(lambda x,y: _cond_apply(x, y, func=aug, prob=aug_p), num_parallel_calls=AUTOTUNE) return dataset class ImageLoggerCallback(Callback): '''Tensorflow 2.0 version Callback that keeps track of a tf.data.Dataset and logs the correct batch to neptune based on the current batch. ''' def __init__(self, data :tf.data.Dataset, freq=1, max_images=-1, name='', encoder=None): self.data = data self.freq = freq self.max_images = max_images self.name = name self.encoder=encoder self.init_iterator() def init_iterator(self): self.data_iter = iter(self.data) self._batch = 0 self._count = 0 self.finished = False def yield_batch(self): batch_data = next(self.data_iter) self._batch += 1 self._count += batch_data[0].shape[0] return batch_data def add_log(self, img, counter=None, name=None): ''' Intention is to generalize this to an abstract class for logging to any experiment management platform (e.g. neptune, mlflow, etc) Currently takes a filepath pointing to an image file and logs to current neptune experiment. ''' scaled_img = (img - np.min(img))/(np.max(img) - np.min(img)) * 255.0 scaled_img = scaled_img.astype(np.uint32) neptune.log_image(log_name= name or self.name, x=counter, y=scaled_img) return scaled_img def on_train_batch_begin(self, batch, logs=None): if batch % self.freq or self.finished: return while batch >= self._batch: x, y = self.yield_batch() if self.max_images==-1: self.max_images=x.shape[0] if x.ndim==3: np.newaxis(x, axis=0) if x.shape[0]>self.max_images: x = x[:self.max_images,...] y = y[:self.max_images,...] x = x.numpy() y = np.argmax(y.numpy(),axis=1) if self.encoder: y = self.encoder.decode(y) for i in range(x.shape[0]): # self.add_log(x[i,...], counter=i, name = f'{self.name}-{y[i]}-batch_{str(self._batch).zfill(3)}') self.add_log(x[i,...], counter=self._count+i, name = f'{self.name}-{y[i]}') print(f'Batch {self._batch}: Logged {np.max([x.shape[0],self.max_images])} {self.name} images to neptune') def on_epoch_end(self, epoch, logs={}): self.finished = True class ConfusionMatrixCallback(Callback): '''Tensorflow 2.0 version''' def __init__(self, log_dir, imgs : dict, labels : dict, classes, freq=1, include_train=False, seed=None): self.file_writer = tf.summary.create_file_writer(log_dir) self.log_dir = log_dir self.seed = seed self._counter = 0 assert np.all(np.array(imgs.keys()) == np.array(labels.keys())) self.imgs = imgs for k,v in labels.items(): if v.ndim==2: labels[k] = tf.argmax(v,axis=-1) self.labels = labels self.num_samples = {k:l.numpy().shape[0] for k,l in labels.items()} self.classes = classes self.freq = freq self.include_train = include_train def log_confusion_matrix(self, model, imgs, labels, epoch, name='', norm_cm=False): pred_labels = model.predict_classes(imgs) # pred_labels = tf.argmax(pred_labels,axis=-1) pred_labels = pred_labels[:,None] con_mat = tf.math.confusion_matrix(labels=labels, predictions=pred_labels, num_classes=len(self.classes)).numpy() if norm_cm: con_mat = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2) con_mat_df = pd.DataFrame(con_mat, index = self.classes, columns = self.classes) figure = plt.figure(figsize=(12, 12)) sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues) plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) image = tf.image.decode_png(buf.getvalue(), channels=4) image = tf.expand_dims(image, 0) with self.file_writer.as_default(): tf.summary.image(name=name+'_confusion_matrix', data=image, step=self._counter) neptune.log_image(log_name=name+'_confusion_matrix', x=self._counter, y=figure) plt.close(figure) self._counter += 1 return image def on_epoch_end(self, epoch, logs={}): if (not self.freq) or (epoch%self.freq != 0): return if self.include_train: cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['train'], self.labels['train'], epoch=epoch, name='train') cm_summary_image = self.log_confusion_matrix(self.model, self.imgs['val'], self.labels['val'], epoch=epoch, name='val') #################################################################################### #################################################################################### #################################################################################### neptune.init(project_qualified_name=args.neptune_project_name) # neptune_tb.integrate_with_tensorflow() experiment_dir = '/media/data/jacob/sandbox_logs' experiment_name = args.experiment_name experiment_start_time = arrow.utcnow().format('YYYY-MM-DD_HH-mm-ss') log_dir =os.path.join(experiment_dir, experiment_name, 'log_dir',PARAMS['loss'], experiment_start_time) ensure_dir_exists(log_dir) print('Tensorboard log_dir: ', log_dir) # os.system(f'neptune tensorboard {log_dir} --project {args.neptune_project_name}') weights_best = os.path.join(log_dir, 'model_ckpt.h5') restore_best_weights=False histogram_freq=0 patience=25 num_epochs = PARAMS['num_epochs'] initial_epoch=0 src_db = pyleaves.DATABASE_PATH datasets = { 'PNAS': pnas_dataset.PNASDataset(src_db=src_db), 'Leaves': leaves_dataset.LeavesDataset(src_db=src_db), 'Fossil': fossil_dataset.FossilDataset(src_db=src_db) } # data = datasets[PARAMS['dataset_name']] data_config = stuf(threshold=PARAMS['data_threshold'], num_classes=PARAMS['num_classes'] , data_splits_meta={ 'train':PARAMS['train_size'], 'val':PARAMS['val_size'], 'test':PARAMS['test_size'] } ) preprocess_input = get_preprocessing_func(PARAMS['model_name']) preprocess_input(tf.zeros([4, 224, 224, 3])) load_example = partial(_load_uint8_example, num_classes=data_config.num_classes) # load_example = partial(_load_example, num_classes=data_config.num_classes) if PARAMS['num_channels']==3: color_aug = {'rgb2gray_3channel':1.0} elif PARAMS['num_channels']==1: color_aug = {'rgb2gray_1channel':1.0} resize_w_pad=None random_jitter=None if not PARAMS['random_jitter']['resize']: resize_w_pad = PARAMS['image_size'] else: random_jitter=PARAMS['random_jitter'] TRAIN_image_augmentor = ImageAugmentor(name='train', augmentations={**PARAMS["augmentations"], **color_aug},#'rotate':1.0,'flip':1.0,**color_aug}, resize_w_pad=resize_w_pad, random_crop=None, random_jitter=random_jitter, log_dir=log_dir, seed=None) VAL_image_augmentor = ImageAugmentor(name='val', augmentations={**color_aug}, resize_w_pad=PARAMS['image_size'], random_crop=None, random_jitter=None, log_dir=log_dir, seed=None) TEST_image_augmentor = ImageAugmentor(name='test', augmentations={**color_aug}, resize_w_pad=PARAMS['image_size'], random_crop=None, random_jitter=None, log_dir=log_dir, seed=None) def neptune_log_augmented_images(split_data, num_demo_samples=40, PARAMS=PARAMS): num_demo_samples = 40 cm_data_x = {'train':[],'val':[]} cm_data_y = {'train':[],'val':[]} cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=False,seed=2836))) cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=False, seed=2836))) for (k_x,v_x), (k_y, v_y) in zip(cm_data_x.items(), cm_data_y.items()): x = tf.data.Dataset.from_tensor_slices(v_x) y = tf.data.Dataset.from_tensor_slices(v_y) xy_data = tf.data.Dataset.zip((x, y)) v = xy_data.map(VAL_image_augmentor.resize, num_parallel_calls=AUTOTUNE) v_aug = TRAIN_image_augmentor.apply_augmentations(xy_data) v_x, v_y = [i.numpy() for i in next(iter(v.batch(10*num_demo_samples)))] v_x_aug, v_y_aug = [i.numpy() for i in next(iter(v_aug.batch(10*num_demo_samples)))] k = k_x for i in range(num_demo_samples): print(f'Neptune: logging {k}_{i}') print(f'{v_x[i].shape}, {v_x_aug[i].shape}') idx = np.random.randint(0,len(v_x)) if True: #'train' in k: TRAIN_image_augmentor.logger.add_log(v_x[idx],counter=i, name=k) TRAIN_image_augmentor.logger.add_log(v_x_aug[idx],counter=i, name=k+'_aug') def get_data_loader(data : tuple, data_subset_mode='train', batch_size=32, num_classes=None, infinite=True, augment=True, seed=2836): num_samples = len(data[0]) x = tf.data.Dataset.from_tensor_slices(data[0]) labels = tf.data.Dataset.from_tensor_slices(data[1]) data = tf.data.Dataset.zip((x, labels)) data = data.cache() if data_subset_mode == 'train': data = data.shuffle(buffer_size=num_samples) # data = data.map(lambda x,y: (tf.image.convert_image_dtype(load_img(x)*255.0,dtype=tf.uint8),y), num_parallel_calls=-1) # data = data.map(load_example, num_parallel_calls=AUTOTUNE) data = data.map(load_example, num_parallel_calls=AUTOTUNE) data = data.map(lambda x,y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE) if infinite: data = data.repeat() if data_subset_mode == 'train': data = data.shuffle(buffer_size=200, seed=seed) augmentor = TRAIN_image_augmentor elif data_subset_mode == 'val': augmentor = VAL_image_augmentor elif data_subset_mode == 'test': augmentor = TEST_image_augmentor if augment: data = augmentor.apply_augmentations(data) data = data.batch(batch_size, drop_remainder=True) return data.prefetch(AUTOTUNE) def get_tfds_data_loader(data : tf.data.Dataset, data_subset_mode='train', batch_size=32, num_samples=100, num_classes=19, infinite=True, augment=True, seed=2836): def encode_example(x, y): x = tf.image.convert_image_dtype(x, tf.float32) * 255.0 y = _encode_label(y, num_classes=num_classes) return x, y test_d = next(iter(data)) print(test_d[0].numpy().min()) print(test_d[0].numpy().max()) data = data.shuffle(buffer_size=num_samples) \ .cache() \ .map(encode_example, num_parallel_calls=AUTOTUNE) test_d = next(iter(data)) print(test_d[0].numpy().min()) print(test_d[0].numpy().max()) data = data.map(preprocess_input, num_parallel_calls=AUTOTUNE) test_d = next(iter(data)) print(test_d[0].numpy().min()) print(test_d[0].numpy().max()) if data_subset_mode == 'train': data = data.shuffle(buffer_size=100, seed=seed) augmentor = TRAIN_image_augmentor elif data_subset_mode == 'val': augmentor = VAL_image_augmentor elif data_subset_mode == 'test': augmentor = TEST_image_augmentor if augment: data = augmentor.apply_augmentations(data) test_d = next(iter(data)) print(test_d[0].numpy().min()) print(test_d[0].numpy().max()) data = data.batch(batch_size, drop_remainder=True) if infinite: data = data.repeat() return data.prefetch(AUTOTUNE) # y_true = [[0, 1, 0], [0, 0, 1]] # y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] def accuracy(y_true, y_pred): y_pred = tf.argmax(y_pred, axis=-1) y_true = tf.argmax(y_true, axis=-1) return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32)) def true_pos(y_true, y_pred): # y_true = K.ones_like(y_true) return K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) def false_pos(y_true, y_pred): # y_true = K.ones_like(y_true) true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) all_positives = K.sum(K.round(K.clip(y_true, 0, 1))) return all_positives - true_positives def true_neg(y_true, y_pred): # y_true = K.ones_like(y_true) return K.sum(1-K.round(K.clip(y_true * y_pred, 0, 1))) def recall(y_true, y_pred): # y_true = K.ones_like(y_true) true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) all_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (all_positives + K.epsilon()) return recall def precision(y_true, y_pred): y_true = K.ones_like(y_true) true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) # tf.print(y_true, y_pred) return precision def f1_score(y_true, y_pred): m_precision = precision(y_true, y_pred) m_recall = recall(y_true, y_pred) # pdb.set_trace() return 2*((m_precision*m_recall)/(m_precision+m_recall+K.epsilon())) # def false_neg(y_true, y_pred): # y_true = K.ones_like(~y_true) # true_neg = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) # all_negative = K.sum(K.round(K.clip(y_true, 0, 1))) # return all_negatives - true_ # return K.mean(K.argmax(y_true,axis=1)*K.argmax(y_pred,axis=1)) # 'accuracy', # metrics.TrueNegatives(name='tn'), # metrics.FalseNegatives(name='fn'), METRICS = [ f1_score, metrics.TruePositives(name='tp'), metrics.FalsePositives(name='fp'), metrics.CategoricalAccuracy(name='accuracy'), metrics.TopKCategoricalAccuracy(name='top_3_categorical_accuracy', k=3), metrics.TopKCategoricalAccuracy(name='top_5_categorical_accuracy', k=5) ] PARAMS['sys.argv'] = ' '.join(sys.argv) with neptune.create_experiment(name=experiment_name, params=PARAMS, upload_source_files=[__file__]): print('Logging experiment tags:') for tag in args.tags: print(tag) neptune.append_tag(tag) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.log_artifact(args.config_path) cm_data_x = {'train':[],'val':[]} cm_data_y = {'train':[],'val':[]} if PARAMS['dataset_name'] in tfds.list_builders(): num_demo_samples=40 tfds_builder = tfds.builder(PARAMS['dataset_name']) tfds_builder.download_and_prepare() num_samples = tfds_builder.info.splits['train'].num_examples num_samples_dict = {'train':int(num_samples*PARAMS['train_size']), 'val':int(num_samples*PARAMS['val_size']), 'test':int(num_samples*PARAMS['test_size'])} classes = tfds_builder.info.features['label'].names num_classes = len(classes) train_slice = [0,int(PARAMS['train_size']*100)] val_slice = [int(PARAMS['train_size']*100), int((PARAMS['train_size']+PARAMS['val_size'])*100)] test_slice = [100 - int(PARAMS['test_size']*100), 100] tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True) tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True) tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True) # PARAMS['batch_size']=1 train_data = get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836) validation_data = get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837) test_data = get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=PARAMS['batch_size'], num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838) # tfds_train_data = tfds.load(PARAMS['dataset_name'], split=f"train[{train_slice[0]}%:{train_slice[1]}%]", shuffle_files=True, as_supervised=True) # tfds_validation_data = tfds.load(PARAMS['dataset_name'], split=f"train[{val_slice[0]}%:{val_slice[1]}%]", shuffle_files=True, as_supervised=True) # tfds_test_data = tfds.load(PARAMS['dataset_name'], split=f"train[{test_slice[0]}%:{test_slice[1]}%]", shuffle_files=True, as_supervised=True) split_data = {'train':get_tfds_data_loader(data = tfds_train_data, data_subset_mode='train', batch_size=num_demo_samples, num_samples=num_samples_dict['train'], num_classes=num_classes, infinite=True, augment=True, seed=2836), 'val':get_tfds_data_loader(data = tfds_validation_data, data_subset_mode='val', batch_size=num_demo_samples, num_samples=num_samples_dict['val'], num_classes=num_classes, infinite=True, augment=True, seed=2837), 'test':get_tfds_data_loader(data = tfds_test_data, data_subset_mode='test', batch_size=num_demo_samples, num_samples=num_samples_dict['test'], num_classes=num_classes, infinite=True, augment=True, seed=2838) } steps_per_epoch=num_samples_dict['train']//PARAMS['batch_size'] validation_steps=num_samples_dict['val']//PARAMS['batch_size'] cm_data_x['train'], cm_data_y['train'] = next(iter(split_data['train'])) cm_data_x['val'], cm_data_y['val'] = next(iter(split_data['val'])) else: data = datasets[PARAMS['dataset_name']] neptune.set_property('num_classes',data.num_classes) neptune.set_property('class_distribution',data.metadata.class_distribution) encoder = base_dataset.LabelEncoder(data.data.family) split_data = base_dataset.preprocess_data(data, encoder, data_config) # import pdb;pdb.set_trace() for subset, subset_data in split_data.items(): split_data[subset] = [list(i) for i in unzip(subset_data)] PARAMS['batch_size'] = 32 steps_per_epoch=len(split_data['train'][0])//PARAMS['batch_size']#//10 validation_steps=len(split_data['val'][0])//PARAMS['batch_size']#//10 split_datasets = { k:base_dataset.BaseDataset.from_dataframe( pd.DataFrame({ 'path':v[0], 'family':v[1] })) \ for k,v in split_data.items() } for k,v in split_datasets.items(): print(k, v.num_classes) classes = split_datasets['train'].classes train_data=get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2836) validation_data=get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2837) if 'test' in split_data.keys(): test_data=get_data_loader(data=split_data['test'], data_subset_mode='test', batch_size=PARAMS['batch_size'], infinite=True, augment=True, seed=2838) num_demo_samples=150 # neptune_log_augmented_images(split_data, num_demo_samples=num_demo_samples, PARAMS=PARAMS) cm_data_x['train'], cm_data_y['train'] = next(iter(get_data_loader(data=split_data['train'], data_subset_mode='train', batch_size=num_demo_samples, infinite=True, augment=True, seed=2836))) cm_data_x['val'], cm_data_y['val'] = next(iter(get_data_loader(data=split_data['val'], data_subset_mode='val', batch_size=num_demo_samples, infinite=True, augment=True, seed=2836))) ######################################################################################## train_image_logger_cb = ImageLoggerCallback(data=train_data, freq=20, max_images=-1, name='train', encoder=encoder) val_image_logger_cb = ImageLoggerCallback(data=validation_data, freq=20, max_images=-1, name='val', encoder=encoder) ######################################################################################## cm_callback = ConfusionMatrixCallback(log_dir, cm_data_x, cm_data_y, classes=classes, seed=PARAMS['seed'], include_train=True) checkpoint = ModelCheckpoint(weights_best, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='min',restore_best_weights=restore_best_weights) tfboard = TensorBoard(log_dir=log_dir, histogram_freq=histogram_freq, write_images=True) early = EarlyStopping(monitor='val_loss', patience=patience, verbose=1) callbacks = [checkpoint,tfboard,early, cm_callback, neptune_logger, train_image_logger_cb, val_image_logger_cb] ########################## if PARAMS['optimizer'] == 'Adam': optimizer = tf.keras.optimizers.Adam( learning_rate=PARAMS['lr'] ) elif PARAMS['optimizer'] == 'Nadam': optimizer = tf.keras.optimizers.Nadam( learning_rate=PARAMS['lr'] ) elif PARAMS['optimizer'] == 'SGD': optimizer = tf.keras.optimizers.SGD( learning_rate=PARAMS['lr'] ) ########################## if PARAMS['loss']=='focal_loss': loss = focal_loss(gamma=2.0, alpha=4.0) elif PARAMS['loss']=='categorical_crossentropy': loss = 'categorical_crossentropy' ########################## model_params = stuf(name=PARAMS['model_name'], model_dir=os.path.join(experiment_dir, experiment_name, 'models'), num_classes=PARAMS['num_classes'], frozen_layers = PARAMS['frozen_layers'], input_shape = (*PARAMS['image_size'],PARAMS['num_channels']), base_learning_rate = PARAMS['lr'], regularization = PARAMS['regularization']) #### if PARAMS['model_name']=='shallow': model = build_shallow(input_shape=model_params.input_shape, num_classes=PARAMS['num_classes'], optimizer=optimizer, loss=loss, METRICS=METRICS) else: model = build_model(model_params, optimizer, loss, METRICS) print(f"TRAINING {PARAMS['model_name']}") model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) history = model.fit(train_data, epochs=num_epochs, callbacks=callbacks, validation_data=validation_data, shuffle=True, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) if 'test' in split_data: results = model.evaluate(test_data, steps=len(split_data['test'][0])) else: results = model.evaluate(validation_data, steps=validation_steps)
def main(): # ================= Arguments ================ # parser = argparse.ArgumentParser(description='PyTorch Knowledge Distillation') parser.add_argument('--gpu', type=str, default="4", help='gpu id') parser.add_argument('--config', type=str, default="config", help='.json') args = parser.parse_args() # ================= Device Setup ================ # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # ================= Config Load ================ # with open('config/' + args.config) as config_file: config = json.load(config_file) # ================= Neptune Setup ================ # if config['neptune']: neptune.init('seongjulee/DCENet', api_token=config["neptune_token"]) # username/project-name, api_token=token from neptune neptune.create_experiment(name='EXP', params=config) # name=project name (anything is ok), params=parameter list (json format) neptune.append_tag(args.config) # neptune tag (str or string list) # ================= Model Setup ================ # model = nn.DataParallel(DCENet(config)).to(device) if len(args.gpu.split(',')) > 1 else DCENet(config).to(device) # ================= Loss Function ================ # criterion = DCENetLoss(config) # ================= Optimizer Setup ================ # optimizer = optim.Adam(model.parameters(), lr=config['lr'], betas=(0.9, 0.999), eps=1e-8, weight_decay=1e-6, amsgrad=False) # ================= Data Loader ================ # datalist = DataInfo() train_datalist = datalist.train_merged print('Train data list', train_datalist) test_datalist = datalist.train_biwi print('Test data list', test_datalist) np.random.seed(10) offsets, traj_data, occupancy = load_data(config, train_datalist, datatype="train") trainval_split = np.random.rand(len(offsets)) < config['split'] train_x = offsets[trainval_split, :config['obs_seq'] - 1, 4:6] train_occu = occupancy[trainval_split, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]] train_y = offsets[trainval_split, config['obs_seq'] - 1:, 4:6] train_y_occu = occupancy[trainval_split, config['obs_seq'] - 1:, ..., :config['enviro_pdim'][-1]] val_x = offsets[~trainval_split, :config['obs_seq'] - 1, 4:6] val_occu = occupancy[~trainval_split, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]] val_y = offsets[~trainval_split, config['obs_seq'] - 1:, 4:6] val_y_occu = occupancy[~trainval_split, config['obs_seq'] - 1:, ..., :config['enviro_pdim'][-1]] print("%.0f trajectories for training\n %.0f trajectories for valiadation" %(train_x.shape[0], val_x.shape[0])) test_offsets, test_trajs, test_occupancy = load_data(config, test_datalist, datatype="test") test_x = test_offsets[:, :config['obs_seq'] - 1, 4:6] test_occu = test_occupancy[:, :config['obs_seq'] - 1, ..., :config['enviro_pdim'][-1]] last_obs_test = test_offsets[:, config['obs_seq'] - 2, 2:4] y_truth = test_offsets[:, config['obs_seq'] - 1:, :4] xy_truth = test_offsets[:, :, :4] print('test_trajs', test_trajs.shape) print("%.0f trajectories for testing" % (test_x.shape[0])) train_dataset = TrajDataset(x=train_x, x_occu=train_occu, y=train_y, y_occu=train_y_occu, mode='train') train_loader = DataLoader(dataset=train_dataset, batch_size=config["batch_size"], shuffle=True, num_workers=4) val_dataset = TrajDataset(x=val_x, x_occu=val_occu, y=val_y, y_occu=val_y_occu, mode='val') val_loader = DataLoader(dataset=val_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4) # test_dataset = TrajDataset(x=test_x, x_occu=test_occu, y=y_truth, y_occu=None, mode='test') # test_loader = DataLoader(dataset=test_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4) # ================= Training Loop ================ # early_stopping = EarlyStopping(patience=config['patience'], verbose=True, filename=args.config.split('/')[-1].replace('.json', '.pth')) for epoch in range(config['max_epochs']): train_one_epoch(config, epoch, device, model, optimizer, criterion, train_loader) val_loss = evaluate(config, device, model, optimizer, criterion, val_loader) early_stopping(val_loss, model) if early_stopping.early_stop: print("Early stopping") break # ================= Test ================ # model.load_state_dict(torch.load(os.path.join('checkpoints', args.config.split('/')[-1].replace('.json', '.pth')))) model.eval() with torch.no_grad(): test_x, test_occu = input2tensor(test_x, test_occu, device) x_latent = model.encoder_x(test_x, test_occu) predictions = [] for i, x_ in enumerate(x_latent): last_pos = last_obs_test[i] x_ = x_.view(1, -1) for i in range(config['num_pred']): y_p = model.decoder(x_, train=False) y_p_ = np.concatenate(([last_pos], np.squeeze(y_p.cpu().numpy())), axis=0) y_p_sum = np.cumsum(y_p_, axis=0) predictions.append(y_p_sum[1:, :]) predictions = np.reshape(predictions, [-1, config['num_pred'], config['pred_seq'], 2]) print('Predicting done!') print(predictions.shape) plot_pred(xy_truth, predictions) # Get the errors for ADE, DEF, Hausdorff distance, speed deviation, heading error print("\nEvaluation results @top%.0f" % config['num_pred']) errors = get_errors(y_truth, predictions) check_collision(y_truth) ## Get the first time prediction by g ranked_prediction = [] for prediction in predictions: ranks = gauss_rank(prediction) ranked_prediction.append(prediction[np.argmax(ranks)]) ranked_prediction = np.reshape(ranked_prediction, [-1, 1, config['pred_seq'], 2]) print("\nEvaluation results for most-likely predictions") ranked_errors = get_errors(y_truth, ranked_prediction)
import neptune import numpy as np # Select project neptune.init('neptune-workshops/AII-Optimali') # Define parameters PARAMS = {'decay_factor': 0.5, 'n_iterations': 117} # Create experiment neptune.create_experiment(name='minimal-extended', params=PARAMS) # Log some metrics for i in range(1, PARAMS['n_iterations']): neptune.log_metric('iteration', i) neptune.log_metric('loss', PARAMS['decay_factor'] / i**0.5) neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2)) # Add tag to the experiment neptune.append_tag('quick_start') # Log some images for j in range(5): array = np.random.rand(10, 10, 3) * 255 array = np.repeat(array, 30, 0) array = np.repeat(array, 30, 1) neptune.log_image('mosaics', array)
def Eval_phase(params,which_files='test',model=None,test_dataloader=None,device=None): if(params['is_model']==True): print("model previously passed") model.eval() else: return 1 # ### Have to modify in the final run # model=select_model(params['what_bert'],params['path_files'],params['weights']) # model.cuda() # model.eval() print("Running eval on ",which_files,"...") t0 = time.time() # Put the model in evaluation mode--the dropout layers behave differently # during evaluation. # Tracking variables true_labels=[] pred_labels=[] logits_all=[] # Evaluate data for one epoch for step, batch in tqdm(enumerate(test_dataloader)): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention vals # [2]: attention mask # [3]: labels b_input_ids = batch[0].to(device) b_att_val = batch[1].to(device) b_input_mask = batch[2].to(device) b_labels = batch[3].to(device) # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) model.zero_grad() outputs = model(b_input_ids, attention_vals=b_att_val, attention_mask=b_input_mask, labels=None,device=device) logits = outputs[0] # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() # Calculate the accuracy for this batch of test sentences. # Accumulate the total accuracy. pred_labels+=list(np.argmax(logits, axis=1).flatten()) true_labels+=list(label_ids.flatten()) logits_all+=list(logits) logits_all_final=[] for logits in logits_all: logits_all_final.append(softmax(logits)) testf1=f1_score(true_labels, pred_labels, average='macro') testacc=accuracy_score(true_labels,pred_labels) if(params['num_classes']==3): testrocauc=roc_auc_score(true_labels, logits_all_final,multi_class='ovo',average='macro') else: #testrocauc=roc_auc_score(true_labels, logits_all_final,multi_class='ovo',average='macro') testrocauc=0 testprecision=precision_score(true_labels, pred_labels, average='macro') testrecall=recall_score(true_labels, pred_labels, average='macro') if(params['logging']!='neptune' or params['is_model'] == True): # Report the final accuracy for this validation run. print(" Accuracy: {0:.2f}".format(testacc)) print(" Fscore: {0:.2f}".format(testf1)) print(" Precision: {0:.2f}".format(testprecision)) print(" Recall: {0:.2f}".format(testrecall)) print(" Roc Auc: {0:.2f}".format(testrocauc)) print(" Test took: {:}".format(format_time(time.time() - t0))) #print(ConfusionMatrix(true_labels,pred_labels)) else: bert_model = params['path_files'] language = params['language'] name_one=bert_model+"_"+language neptune.create_experiment(name_one,params=params,send_hardware_metrics=False,run_monitoring_thread=False) neptune.append_tag(bert_model) neptune.append_tag(language) neptune.append_tag('test') neptune.log_metric('test_f1score',testf1) neptune.log_metric('test_accuracy',testacc) neptune.log_metric('test_precision',testprecision) neptune.log_metric('test_recall',testrecall) neptune.log_metric('test_rocauc',testrocauc) neptune.stop() return testf1,testacc,testprecision,testrecall,testrocauc,logits_all_final
parser.add_argument('--weight_decay', type=float, help='') parser.add_argument('--max_epoch', type=int, help='') parser.add_argument('--valid_every', type=int, help='') parser.add_argument('--out_dir', type=str, help='') parser.add_argument('--out_file', type=str, help='') parser.add_argument('--patience', type=int, help='') parser.add_argument('--is_train', type=int, help='') parser.add_argument('--dim_input', type=int, help='') parser.add_argument('--dim_out', type=int, help='') parser.add_argument('--dim_lstm_hidden', type=int, help='') parser.add_argument('--dim_fc_hidden', type=int, help='') parser.add_argument('--rnn_len', type=int, help='') parser.add_argument('--name', type=str, help='') parser.add_argument('--tag', type=str, help='') parser.add_argument('--n_cmt', type=int, help='') args = parser.parse_args() params = vars(args) neptune.init('cjlee/AnomalyDetection-Supervised-RNN') experiment = neptune.create_experiment(name=args.name, params=params) neptune.append_tag(args.tag) args.out_dir = './result' args.out_file = experiment.id + '.pth' # temporary code for testing train_main(args, neptune)
# Directory to save the pretrained model parser.add_argument("--save_dir", default="./resource/checkpoint/zinc_daga/") args = parser.parse_args() # Initialize random seed and prepare CUDA device device = torch.device(0) random.seed(0) # Initialize neptune neptune.init( project_qualified_name="sungsoo.ahn/deep-molecular-optimization") neptune.create_experiment(name="pretrain", params=vars(args)) neptune.append_tag(args.dataset) # Load character dict and dataset char_dict = SmilesCharDictionary(dataset=args.dataset, max_smi_len=args.max_smiles_length) dataset = load_dataset(char_dict=char_dict, smi_path=args.dataset_path) # Prepare neural apprentice. We set max_sampling_batch_size=0 since we do not use sampling. input_size = max(char_dict.char_idx.values()) + 1 generator = SmilesGenerator( input_size=input_size, hidden_size=args.hidden_size, output_size=input_size, n_layers=args.n_layers, lstm_dropout=args.lstm_dropout, )
def training_pipeline(args): ############################################################################### # Environment setup ############################################################################### # Set the random seed manually for reproducibility. random.seed(args.seed) torch.manual_seed(args.seed) # Check if CUDA device is available and set training on CPUs or GPUs if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) device = torch.device(args.cuda_device if args.cuda else "cpu") ############################################################################### # Experiment tracking setup ############################################################################### neptune.init(project_qualified_name='karexar/GSW-dialect-classifier') args_dict = vars(args) neptune.create_experiment(params=args_dict) if hasattr(args, 'experiment_id'): neptune.append_tag(args.experiment_id) neptune.set_property('lm_algo', 'lstm') for key in args_dict.keys(): neptune.set_property(key, args_dict[key]) ############################################################################### # Load data ############################################################################### print('Loading data') data_manager = DataManager(args.data, device, args.batch_size, args.eval_batch_size) ############################################################################### # Build the model ############################################################################### print('Building model') num_tokens = data_manager.vocab_size num_labels = data_manager.num_labels embeddings_matrix = None if args.use_pretrained_embed: # Load pre-trained word embeddings model # and generate the embeddings weight matrix for the entire vocabulary assert args.embed_algo is not None print(f'Using {args.embed_algo} pre-trained word embeddings') if args.embed_algo == 'word2vec': pretrained_embeddings = Word2VecModel(args.model_path_embed, args.model_name_embed, load_from_disk=True) embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings( data_manager.idx2word, args.embed_size) elif args.embed_algo == 'glove': pretrained_embeddings = GloveModel(args.model_path_embed, args.model_name_embed, load_from_disk=True) embeddings_matrix = pretrained_embeddings.get_vocabulary_embeddings( data_manager.idx2word, args.embed_size) model = LSTM(num_tokens, args.embed_size, args.num_hidden, args.num_layers, args.dropout, num_labels, embeddings_matrix).to(device) print('Model architecture') print(model) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### print('Initialising model executor') model_executor = ModelExecutor(model, data_manager, device, criterion) if args.train_lstm: # Loop over epochs learning_rate = args.learning_rate best_val_accuracy = None last_val_accuracy = 0 model_optimiser = optim.SGD(model.parameters(), lr=learning_rate) # At any point you can hit Ctrl + C to break out of training early. try: print('Starting the training process') for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() _, _ = model_executor.train(epoch, args.batch_size, learning_rate, model_optimiser, args.clip, args.log_interval) val_loss, val_accuracy = model_executor.evaluate( data_manager.val_iter, args.eval_batch_size) # Log result in Neptune ML neptune.send_metric('valid_loss', epoch, val_loss) neptune.send_metric('valid_accuracy', epoch, val_accuracy) neptune.send_metric('learning_rate', epoch, learning_rate) if epoch % 3 == 0: learning_rate *= 0.9 # correct the learning rate after some number of epochs print('-' * 89) print( '| End of epoch {:3d} | Time: {:5.2f}s | Valid loss {:6.2f} | ' 'Valid accuracy {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, val_accuracy)) print('-' * 89) # Save the model if the validation accuracy is the best we've seen so far. if not best_val_accuracy or val_accuracy > best_val_accuracy: model_executor.model.export_model(args.model_path_lstm) best_val_accuracy = val_accuracy if val_accuracy < last_val_accuracy: # Anneal the learning rate if no improvement has been seen in the validation dataset. learning_rate /= 2.0 for group in model_optimiser.param_groups: group['lr'] = learning_rate last_val_accuracy = val_accuracy except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') ############################################################################### # Evaluation code ############################################################################### test_loss = None test_accuracy = None if args.eval_lstm: print('Evaluating on the test set') # Load the best saved model. model_executor.load_pre_trained_model(args.model_path_lstm, device=device) # Run on test data. test_loss, test_accuracy = model_executor.evaluate( data_manager.test_iter, args.eval_batch_size) # Log result in Neptune ML neptune.send_metric('test_loss', test_loss) neptune.send_metric('test_accuracy', test_accuracy) print('-' * 89) print('| End of evaluation | Test loss {:6.2f}'.format(test_loss) + ' | Test accuracy {:8.2f}'.format(test_accuracy)) print('-' * 89) ############################################################################### # Stop the experiment tracking ############################################################################### neptune.stop() return test_loss, test_accuracy
parameters['metrics_separately'] = args.metrics_separately parameters['random_val_neg_sampler'] = args.random_val_neg_sampler parameters['val_regenerate'] = args.val_regenerate if args.log: import neptune neptune.init(args.neptune_project) neptune_experiment_name = args.experiment_name neptune.create_experiment(name=neptune_experiment_name, params=parameters, upload_stdout=True, upload_stderr=True, send_hardware_metrics=True, upload_source_files='**/*.py') neptune.append_tag('pytorch') if args.gpu: neptune.append_tag('gpu') if args.use_proteins: neptune.append_tag('proteins') if args.reversed: neptune.append_tag('reversed') neptune.append_tag('real data') neptune.append_tag('trivec') use_cuda = args.gpu and torch.cuda.is_available() device = torch.device("cuda" if args.gpu else "cpu") print(f'Use device: {device}') kg = KnowledgeGraph(data_path=DATA_CONST['work_dir'], use_proteins=args.use_proteins,
def train_model(params, best_val_fscore): # In case of english languages, translation is the origin data itself. if (params['language'] == 'English'): params['csv_file'] = '*_full.csv' train_path = params['files'] + '/train/' + params['csv_file'] val_path = params['files'] + '/val/' + params['csv_file'] # Load the training and validation datasets train_files = glob.glob(train_path) val_files = glob.glob(val_path) #Load the bert tokenizer print('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained(params['path_files'], do_lower_case=False) df_train = data_collector(train_files, params, True) df_val = data_collector(val_files, params, False) # Get the comment texts and corresponding labels if (params['csv_file'] == '*_full.csv'): sentences_train = df_train.text.values sentences_val = df_val.text.values elif (params['csv_file'] == '*_translated.csv'): sentences_train = df_train.translated.values sentences_val = df_val.translated.values labels_train = df_train.label.values labels_val = df_val.label.values label_counts = df_train['label'].value_counts() print(label_counts) label_weights = [(len(df_train)) / label_counts[0], len(df_train) / label_counts[1]] print(label_weights) # Select the required bert model. Refer below for explanation of the parameter values. model = select_model(params['what_bert'], params['path_files'], params['weights']) # Tell pytorch to run this model on the GPU. model.cuda() # Do the required encoding using the bert tokenizer input_train_ids, att_masks_train = combine_features( sentences_train, tokenizer, params['max_length']) input_val_ids, att_masks_val = combine_features(sentences_val, tokenizer, params['max_length']) # Create dataloaders for both the train and validation datasets. train_dataloader = return_dataloader(input_train_ids, labels_train, att_masks_train, batch_size=params['batch_size'], is_train=params['is_train']) validation_dataloader = return_dataloader(input_val_ids, labels_val, att_masks_val, batch_size=params['batch_size'], is_train=False) # Initialize AdamW optimizer. optimizer = AdamW( model.parameters(), lr=params[ 'learning_rate'], # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=params['epsilon'] # args.adam_epsilon - default is 1e-8. ) # Number of training epochs (authors recommend between 2 and 4) # Total number of training steps is number of batches * number of epochs. total_steps = len(train_dataloader) * params['epochs'] # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(total_steps / 10), # Default value in run_glue.py num_training_steps=total_steps) # Set the seed value all over the place to make this reproducible. fix_the_random(seed_val=params['random_seed']) # Store the average loss after each epoch so we can plot them. loss_values = [] # Create a new experiment in neptune for this run. bert_model = params['path_files'] language = params['language'] name_one = bert_model + "_" + language if (params['logging'] == 'neptune'): neptune.create_experiment(name_one, params=params, send_hardware_metrics=False, run_monitoring_thread=False) neptune.append_tag(bert_model) neptune.append_tag(language) # The best val fscore obtained till now, for the purpose of hyper parameter finetuning. best_val_fscore = best_val_fscore # For each epoch... for epoch_i in range(0, params['epochs']): print("") print('======== Epoch {:} / {:} ========'.format( epoch_i + 1, params['epochs'])) print('Training...') # Measure how long the training epoch takes. t0 = time.time() # Reset the total loss for this epoch. total_loss = 0 model.train() # For each batch of training data... for step, batch in tqdm(enumerate(train_dataloader)): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention masks # [2]: labels b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) model.zero_grad() # Get the model outputs for this batch. outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) # The call to `model` always returns a tuple, so we need to pull the # loss value out of the tuple. loss = outputs[0] if (params['logging'] == 'neptune'): neptune.log_metric('batch_loss', loss) # Accumulate the training loss over all of the batches so that we can # calculate the average loss at the end. `loss` is a Tensor containing a # single value; the `.item()` function just returns the Python value # from the tensor. total_loss += loss.item() # Perform a backward pass to calculate the gradients. loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and take a step using the computed gradient. # The optimizer dictates the "update rule"--how the parameters are # modified based on their gradients, the learning rate, etc. optimizer.step() # Update the learning rate. scheduler.step() # Calculate the average loss over the training data. avg_train_loss = total_loss / len(train_dataloader) if (params['logging'] == 'neptune'): neptune.log_metric('avg_train_loss', avg_train_loss) # Store the loss value for plotting the learning curve. loss_values.append(avg_train_loss) # Compute the metrics on the validation and test sets. val_fscore, val_accuracy = Eval_phase(params, 'val', model) test_fscore, test_accuracy = Eval_phase(params, 'test', model) #Report the final accuracy and fscore for this validation run. if (params['logging'] == 'neptune'): neptune.log_metric('val_fscore', val_fscore) neptune.log_metric('val_acc', val_accuracy) neptune.log_metric('test_fscore', test_fscore) neptune.log_metric('test_accuracy', test_accuracy) # Save the model only if the validation fscore improves. After all epochs, the best model is the final saved one. if (val_fscore > best_val_fscore): print(val_fscore, best_val_fscore) best_val_fscore = val_fscore save_model(model, tokenizer, params) if (params['logging'] == 'neptune'): neptune.stop() del model torch.cuda.empty_cache() return fscore, best_val_fscore
state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt'+run_start_time+'.pth') best_acc = acc your_file.close() if __name__ == '__main__': # writer = SummaryWriter(log_dir="/home/dltdc/data/projects_logs/water_logs/", filename_suffix=run_start_time) trainloader, testloader = loaddata(load_water_data=True) net, criterion, optimizer = loadmodel(nb_class=1, img_HW=256, pretrain_model="resnet50") # trainloader, testloader = loaddata() # net, criterion, optimizer = loadmodel(nb_class=10, img_HW=8, pretrain_model='resnet18') with neptune.create_experiment(name='new-model'): neptune.append_tag('first') for epoch in range(start_epoch, start_epoch+args.nb_epoch): train(epoch, net, criterion, optimizer, trainloader) test(epoch, net, criterion, optimizer, testloader) # writer.close()
def main(): if FLAGS.exp == 'celebA': opts = configs.config_celebA elif FLAGS.exp == 'celebA_small': opts = configs.config_celebA_small elif FLAGS.exp == 'mnist': opts = configs.config_mnist elif FLAGS.exp == 'mnist_ord': opts = configs.config_mnist_ord elif FLAGS.exp == 'mnist_small': opts = configs.config_mnist_small elif FLAGS.exp == 'dsprites': opts = configs.config_dsprites elif FLAGS.exp == 'grassli': opts = configs.config_grassli elif FLAGS.exp == 'grassli_small': opts = configs.config_grassli_small elif FLAGS.exp == 'syn_constant_uniform': opts = configs.config_syn_constant_uniform elif FLAGS.exp == 'syn_2_constant_uniform': opts = configs.config_syn_2_constant_uniform elif FLAGS.exp == 'checkers': opts = configs.config_checkers elif FLAGS.exp == 'noise': opts = configs.config_noise elif FLAGS.exp == 'noise_unif': opts = configs.config_noise_unif else: assert False, 'Unknown experiment configuration' opts['exp'] = FLAGS.exp opts['seed'] = FLAGS.seed opts['mode'] = FLAGS.mode if opts['mode'] == 'test': assert FLAGS.checkpoint is not None, 'Checkpoint must be provided' opts['checkpoint'] = FLAGS.checkpoint if FLAGS.batch_size is not None: opts['batch_size'] = FLAGS.batch_size if FLAGS.recalculate_size is not None: opts['recalculate_size'] = FLAGS.recalculate_size assert opts['recalculate_size'] >= opts[ 'batch_size'], "recalculate_size should be at least as large as batch_size" else: opts['recalculate_size'] = opts['batch_size'] if FLAGS.zdim is not None: opts['zdim'] = FLAGS.zdim if FLAGS.pz is not None: opts['pz'] = FLAGS.pz if FLAGS.lr is not None: opts['lr'] = FLAGS.lr if FLAGS.lr_schedule is not None: opts['lr_schedule'] = FLAGS.lr_schedule if FLAGS.w_aef is not None: opts['w_aef'] = FLAGS.w_aef if FLAGS.z_test is not None: opts['z_test'] = FLAGS.z_test if FLAGS.lambda_schedule is not None: opts['lambda_schedule'] = FLAGS.lambda_schedule if FLAGS.work_dir is not None: opts['work_dir'] = FLAGS.work_dir if FLAGS.wae_lambda is not None: opts['lambda'] = FLAGS.wae_lambda if FLAGS.enc_noise is not None: opts['e_noise'] = FLAGS.enc_noise if FLAGS.z_test_scope is not None: opts['z_test_scope'] = FLAGS.z_test_scope if FLAGS.length_lambda is not None: opts['length_lambda'] = FLAGS.length_lambda if FLAGS.grad_clip is not None: opts['grad_clip'] = FLAGS.grad_clip else: opts['grad_clip'] = None if FLAGS.rec_lambda is not None: opts['rec_lambda'] = FLAGS.rec_lambda if FLAGS.zxz_lambda is not None: opts['zxz_lambda'] = FLAGS.zxz_lambda if FLAGS.train_size is not None: opts['train_size'] = FLAGS.train_size if FLAGS.nat_size is not None: opts['nat_size'] = FLAGS.nat_size else: opts['nat_size'] = FLAGS.train_size opts['nat_resampling'] = FLAGS.nat_resampling opts['sinkhorn_sparse'] = FLAGS.sinkhorn_sparse opts['sinkhorn_sparsifier'] = FLAGS.sinkhorn_sparsifier opts['sparsifier_freq'] = FLAGS.sparsifier_freq opts['sinkhorn_unbiased'] = FLAGS.sinkhorn_unbiased opts['feed_by_score_from_epoch'] = FLAGS.feed_by_score_from_epoch opts['recalculate_size'] = FLAGS.recalculate_size opts['stay_lambda'] = FLAGS.stay_lambda opts['mover_ratio'] = FLAGS.mover_ratio assert opts['mover_ratio'] >= 0 and opts[ 'mover_ratio'] <= 1, "mover_ratio must be in [0,1]" if FLAGS.sinkhorn_iters is not None: opts['sinkhorn_iters'] = FLAGS.sinkhorn_iters if FLAGS.sinkhorn_epsilon is not None: opts['sinkhorn_epsilon'] = FLAGS.sinkhorn_epsilon if FLAGS.name is not None: opts['name'] = FLAGS.name if FLAGS.tags is not None: opts['tags'] = FLAGS.tags if FLAGS.epoch_num is not None: opts['epoch_num'] = FLAGS.epoch_num if FLAGS.e_pretrain is not None: opts['e_pretrain'] = FLAGS.e_pretrain if FLAGS.shuffle is not None: opts['shuffle'] = FLAGS.shuffle if opts['verbose']: pass #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') utils.create_dir(opts['work_dir']) utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints')) if opts['e_noise'] == 'gaussian' and opts['pz'] != 'normal': assert False, 'Gaussian encoders compatible only with Gaussian prior' return # Dumping all the configs to the text file with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) # Loading the dataset data = DataHandler(opts) assert data.num_points >= opts['batch_size'], 'Training set too small' if 'train_size' in opts and opts['train_size'] is not None: train_size = opts['train_size'] else: train_size = data.num_points print("Train size:", train_size) if opts['nat_size'] == -1: opts['nat_size'] = train_size use_neptune = "NEPTUNE_API_TOKEN" in os.environ if opts['mode'] == 'train': if use_neptune: neptune.init(project_qualified_name="csadrian/global-sinkhorn") exp = neptune.create_experiment( params=opts, name=opts['name'], upload_source_files=['wae.py', 'run.py', 'models.py']) for tag in opts['tags'].split(','): neptune.append_tag(tag) # Creating WAE model wae = WAE(opts, train_size) data.num_points = train_size # Training WAE wae.train(data) if use_neptune: exp.stop() elif opts['mode'] == 'test': # Do something else improved_wae.improved_sampling(opts) elif opts['mode'] == 'generate': fideval.generate(opts) elif opts['mode'] == 'draw': picture_plot.createimgs(opts)
def __init__(self, tags): neptune.set_project('pixelneo/whoosh') neptune.create_experiment() for tag in tags: neptune.append_tag(tag)
def train_imagenette(PARAMS): neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) K.clear_session() tf.random.set_seed(34) target_size = PARAMS['target_size'] BATCH_SIZE = PARAMS['BATCH_SIZE'] train_dataset, validation_dataset, info = create_Imagenette_dataset( BATCH_SIZE, target_size=target_size, augment_train=PARAMS['augment_train']) num_classes = info.features['label'].num_classes encoder = base_dataset.LabelEncoder(info.features['label'].names) train_dataset = train_dataset.map( lambda x, y: apply_preprocess(x, y, num_classes), num_parallel_calls=-1) validation_dataset = validation_dataset.map( lambda x, y: apply_preprocess(x, y, num_classes), num_parallel_calls=-1) PARAMS['num_classes'] = num_classes steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE validation_steps = info.splits['validation'].num_examples // BATCH_SIZE neptune.set_property('num_classes', num_classes) neptune.set_property('steps_per_epoch', steps_per_epoch) neptune.set_property('validation_steps', validation_steps) optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['learning_rate']) loss = 'categorical_crossentropy' METRICS = ['accuracy'] base = tf.keras.applications.vgg16.VGG16( weights='imagenet', include_top=False, input_tensor=Input(shape=(*target_size, 3))) # TODO try freezing weights for input_shape != (224,224) model = build_head(base, num_classes=num_classes) model.compile(optimizer=optimizer, loss=loss, metrics=METRICS) callbacks = [ neptune_logger, ImageLoggerCallback(data=train_dataset, freq=10, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=10, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=2, verbose=1) ] model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=10, callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps)
def train_model(params,device): embeddings=None if(params['bert_tokens']): train,val,test=createDatasetSplit(params) else: train,val,test,vocab_own=createDatasetSplit(params) params['embed_size']=vocab_own.embeddings.shape[1] params['vocab_size']=vocab_own.embeddings.shape[0] embeddings=vocab_own.embeddings if(params['auto_weights']): y_test = [ele[2] for ele in test] # print(y_test) encoder = LabelEncoder() encoder.classes_ = np.load(params['class_names'],allow_pickle=True) params['weights']=class_weight.compute_class_weight('balanced',np.unique(y_test),y_test).astype('float32') #params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32') print(params['weights']) train_dataloader =combine_features(train,params,is_train=True) validation_dataloader=combine_features(val,params,is_train=False) test_dataloader=combine_features(test,params,is_train=False) model=select_model(params,embeddings) if(params["device"]=='cuda'): model.cuda() optimizer = AdamW(model.parameters(), lr = params['learning_rate'], # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps = params['epsilon'] # args.adam_epsilon - default is 1e-8. ) # Number of training epochs (authors recommend between 2 and 4) # Total number of training steps is number of batches * number of epochs. total_steps = len(train_dataloader) * params['epochs'] # Create the learning rate scheduler. if(params['bert_tokens']): scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = int(total_steps/10), num_training_steps = total_steps) # Set the seed value all over the place to make this reproducible. fix_the_random(seed_val = params['random_seed']) # Store the average loss after each epoch so we can plot them. loss_values = [] if(params['bert_tokens']): bert_model = params['path_files'] name_one=bert_model else: name_one=params['model_name'] if(params['logging']=='neptune'): neptune.create_experiment(name_one,params=params,send_hardware_metrics=False,run_monitoring_thread=False) neptune.append_tag(name_one) if(params['best_params']): neptune.append_tag('AAAI final best') else: neptune.append_tag('AAAI final') best_val_fscore=0 best_test_fscore=0 best_val_roc_auc=0 best_test_roc_auc=0 best_val_precision=0 best_test_precision=0 best_val_recall=0 best_test_recall=0 for epoch_i in range(0, params['epochs']): print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, params['epochs'])) print('Training...') # Measure how long the training epoch takes. t0 = time.time() # Reset the total loss for this epoch. total_loss = 0 model.train() # For each batch of training data... for step, batch in tqdm(enumerate(train_dataloader)): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention vals # [2]: attention mask # [3]: labels b_input_ids = batch[0].to(device) b_att_val = batch[1].to(device) b_input_mask = batch[2].to(device) b_labels = batch[3].to(device) # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) model.zero_grad() outputs = model(b_input_ids, attention_vals=b_att_val, attention_mask=b_input_mask, labels=b_labels, device=device) # The call to `model` always returns a tuple, so we need to pull the # loss value out of the tuple. loss = outputs[0] if(params['logging']=='neptune'): neptune.log_metric('batch_loss',loss.item()) # Accumulate the training loss over all of the batches so that we can # calculate the average loss at the end. `loss` is a Tensor containing a # single value; the `.item()` function just returns the Python value # from the tensor. total_loss += loss.item() # Perform a backward pass to calculate the gradients. loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and take a step using the computed gradient. # The optimizer dictates the "update rule"--how the parameters are # modified based on their gradients, the learning rate, etc. optimizer.step() # Update the learning rate. if(params['bert_tokens']): scheduler.step() # Calculate the average loss over the training data. avg_train_loss = total_loss / len(train_dataloader) if(params['logging']=='neptune'): neptune.log_metric('avg_train_loss',avg_train_loss) else: print('avg_train_loss',avg_train_loss) # Store the loss value for plotting the learning curve. loss_values.append(avg_train_loss) train_fscore,train_accuracy,train_precision,train_recall,train_roc_auc,_=Eval_phase(params,'train',model,train_dataloader,device) val_fscore,val_accuracy,val_precision,val_recall,val_roc_auc,_=Eval_phase(params,'val',model,validation_dataloader,device) test_fscore,test_accuracy,test_precision,test_recall,test_roc_auc,logits_all_final=Eval_phase(params,'test',model,test_dataloader,device) #Report the final accuracy for this validation run. if(params['logging']=='neptune'): neptune.log_metric('test_fscore',test_fscore) neptune.log_metric('test_accuracy',test_accuracy) neptune.log_metric('test_precision',test_precision) neptune.log_metric('test_recall',test_recall) neptune.log_metric('test_rocauc',test_roc_auc) neptune.log_metric('val_fscore',val_fscore) neptune.log_metric('val_accuracy',val_accuracy) neptune.log_metric('val_precision',val_precision) neptune.log_metric('val_recall',val_recall) neptune.log_metric('val_rocauc',val_roc_auc) neptune.log_metric('train_fscore',train_fscore) neptune.log_metric('train_accuracy',train_accuracy) neptune.log_metric('train_precision',train_precision) neptune.log_metric('train_recall',train_recall) neptune.log_metric('train_rocauc',train_roc_auc) if(val_fscore > best_val_fscore): print(val_fscore,best_val_fscore) best_val_fscore=val_fscore best_test_fscore=test_fscore best_val_roc_auc = val_roc_auc best_test_roc_auc = test_roc_auc best_val_precision = val_precision best_test_precision = test_precision best_val_recall = val_recall best_test_recall = test_recall if(params['bert_tokens']): print('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False) save_bert_model(model,tokenizer,params) else: print("Saving model") save_normal_model(model,params) if(params['logging']=='neptune'): neptune.log_metric('best_val_fscore',best_val_fscore) neptune.log_metric('best_test_fscore',best_test_fscore) neptune.log_metric('best_val_rocauc',best_val_roc_auc) neptune.log_metric('best_test_rocauc',best_test_roc_auc) neptune.log_metric('best_val_precision',best_val_precision) neptune.log_metric('best_test_precision',best_test_precision) neptune.log_metric('best_val_recall',best_val_recall) neptune.log_metric('best_test_recall',best_test_recall) neptune.stop() else: print('best_val_fscore',best_val_fscore) print('best_test_fscore',best_test_fscore) print('best_val_rocauc',best_val_roc_auc) print('best_test_rocauc',best_test_roc_auc) print('best_val_precision',best_val_precision) print('best_test_precision',best_test_precision) print('best_val_recall',best_val_recall) print('best_test_recall',best_test_recall) # del model # torch.cuda.empty_cache() return model
def train_pnas(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(34) train_dataset, validation_dataset, data_files = create_dataset( dataset_name=PARAMS['dataset_name'], batch_size=PARAMS['BATCH_SIZE'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augment_train=PARAMS['augment_train'], aug_prob=PARAMS['aug_prob']) PARAMS['num_classes'] = data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size'][ 'train'] = data_files.num_samples * PARAMS['splits']['train'] PARAMS['splits_size'][ 'validation'] = data_files.num_samples * PARAMS['splits']['validation'] steps_per_epoch = PARAMS['splits_size']['train'] // PARAMS['BATCH_SIZE'] validation_steps = PARAMS['splits_size']['validation'] // PARAMS[ 'BATCH_SIZE'] neptune.set_property('num_classes', PARAMS['num_classes']) neptune.set_property('steps_per_epoch', steps_per_epoch) neptune.set_property('validation_steps', validation_steps) encoder = base_dataset.LabelEncoder(data_files.classes) # train_dataset = train_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # validation_dataset = validation_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # METRICS = ['accuracy'] callbacks = [ neptune_logger, ImageLoggerCallback(data=train_dataset, freq=10, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=10, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=25, verbose=1) ] PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels']) model = build_model(PARAMS) # if PARAMS['optimizer']=='Adam': # optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr']) # base = tf.keras.applications.vgg16.VGG16(weights='imagenet', # include_top=False, # input_tensor=Input(shape=(*PARAMS['target_size'],3))) # model = build_head(base, num_classes=PARAMS['num_classes']) # model.compile(optimizer=optimizer, # loss=PARAMS['loss'], # metrics=METRICS) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) for k, v in PARAMS.items(): neptune.set_property(str(k), str(v)) return history
Epoch:\t{}\n\ Rounds:\t{}\n\ Total Number of Users:\t{}\n\ Selected Users:\t{}\n\ Server Pure Data: {}\n\ Mode:\t{}\n\ Attack:\t{}\n\ Attackers:\t{}\n\ Output folder:\t{}".format(args.epochs, args.rounds, args.total_users_num, args.selected_users_num, args.server_pure, args.mode, args.attack_type, args.attackers_num, args.log_dir)) # Neptune logging initialization if args.neptune_log: neptune.init(project_qualified_name=configs['log']['neptune_init'], api_token=utils.get_neptune_token()) neptune.create_experiment(name=configs['log']['neptune_exp'], upload_stdout=False, upload_stderr=False) neptune.append_tag(args.log_dir.split("/")[1]) last_round = main(arguments['--start-round']) if args.neptune_log: neptune.stop() if last_round < args.rounds: exit(1) else: exit(0)
def Eval_phase(params, which_files='test', model=None): # For english, there is no translation, hence use full dataset. if (params['language'] == 'English'): params['csv_file'] = '*_full.csv' # Load the files to test on if (which_files == 'train'): path = params['files'] + '/train/' + params['csv_file'] test_files = glob.glob(path) if (which_files == 'val'): path = params['files'] + '/val/' + params['csv_file'] test_files = glob.glob(path) if (which_files == 'test'): path = params['files'] + '/test/' + params['csv_file'] test_files = glob.glob(path) '''Testing phase of the model''' print('Loading BERT tokenizer...') # Load bert tokenizer tokenizer = BertTokenizer.from_pretrained(params['path_files'], do_lower_case=False) # If model is passed, then use the given model. Else load the model from the saved location # Put the model in evaluation mode--the dropout layers behave differently # during evaluation. if (params['is_model'] == True): print("model previously passed") model.eval() else: model = select_model(params['what_bert'], params['path_files'], params['weights']) model.cuda() model.eval() # Load the dataset df_test = data_collector(test_files, params, False) if (params['csv_file'] == '*_translated.csv'): sentences_test = df_test.translated.values elif (params['csv_file'] == '*_full.csv'): sentences_test = df_test.text.values labels_test = df_test.label.values # Encode the dataset using the tokenizer input_test_ids, att_masks_test = combine_features(sentences_test, tokenizer, params['max_length']) test_dataloader = return_dataloader(input_test_ids, labels_test, att_masks_test, batch_size=params['batch_size'], is_train=False) print("Running eval on ", which_files, "...") t0 = time.time() # Tracking variables eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 true_labels = [] pred_labels = [] for batch in test_dataloader: # Add batch to GPU batch = tuple(t.to(device) for t in batch) # Unpack the inputs from our dataloader b_input_ids, b_input_mask, b_labels = batch # Telling the model not to compute or store gradients, saving memory and # speeding up validation with torch.no_grad(): outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) logits = outputs[0] # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() # Calculate the accuracy for this batch of test sentences. tmp_eval_accuracy = flat_accuracy(logits, label_ids) # Accumulate the total accuracy. eval_accuracy += tmp_eval_accuracy pred_labels += list(np.argmax(logits, axis=1).flatten()) true_labels += list(label_ids.flatten()) # Track the number of batches nb_eval_steps += 1 # Get the accuracy and macro f1 scores testf1 = f1_score(true_labels, pred_labels, average='macro') testacc = accuracy_score(true_labels, pred_labels) # Log the metrics obtained if (params['logging'] != 'neptune' or params['is_model'] == True): # Report the final accuracy for this validation run. print(" Accuracy: {0:.2f}".format(testacc)) print(" Fscore: {0:.2f}".format(testf1)) print(" Test took: {:}".format(format_time(time.time() - t0))) else: bert_model = params['path_files'][:-1] language = params['language'] name_one = bert_model + "_" + language neptune.create_experiment(name_one, params=params, send_hardware_metrics=False, run_monitoring_thread=False) neptune.append_tag(bert_model) neptune.append_tag(language) neptune.append_tag('test') neptune.log_metric('test_f1score', testf1) neptune.log_metric('test_accuracy', testacc) neptune.stop() return testf1, testacc
def train_pyleaves_dataset(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(PARAMS['seed']) train_dataset, validation_dataset, STAGE1_data_files, excluded = create_dataset( dataset_name=PARAMS['dataset_name'], threshold=PARAMS['threshold'], batch_size=PARAMS['BATCH_SIZE'], buffer_size=PARAMS['buffer_size'], exclude_classes=PARAMS['exclude_classes'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augmentations=PARAMS['augmentations'], seed=PARAMS['seed'], use_tfrecords=PARAMS['use_tfrecords'], tfrecord_dir=PARAMS['tfrecord_dir'], samples_per_shard=PARAMS['samples_per_shard']) PARAMS['num_classes'] = STAGE1_data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size']['train'] = int(STAGE1_data_files.num_samples * PARAMS['splits']['train']) PARAMS['splits_size']['validation'] = int(STAGE1_data_files.num_samples * PARAMS['splits']['validation']) PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[ 'BATCH_SIZE'] PARAMS['validation_steps'] = PARAMS['splits_size']['validation'] // PARAMS[ 'BATCH_SIZE'] neptune.set_property('num_classes', PARAMS['num_classes']) neptune.set_property('steps_per_epoch', PARAMS['steps_per_epoch']) neptune.set_property('validation_steps', PARAMS['validation_steps']) # TODO: log encoder contents as dict encoder = base_dataset.LabelEncoder(STAGE1_data_files.classes) PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels']) # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") # with strategy.scope(): model = build_model(PARAMS) # model = build_or_restore_model(PARAMS) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) backup_callback = BackupAndRestore(PARAMS['checkpoints_path']) backup_callback.set_model(model) callbacks = [ neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True) ] #, # ImageLoggerCallback(data=train_dataset, freq=1000, max_images=-1, name='train', encoder=encoder), # ImageLoggerCallback(data=validation_dataset, freq=1000, max_images=-1, name='val', encoder=encoder), history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=PARAMS['steps_per_epoch'], validation_steps=PARAMS['validation_steps']) # initial_epoch=0, # TODO: Change build_model to build_or_load_model model.save(PARAMS['saved_model_path'] + '-stage 1') for k, v in PARAMS.items(): neptune.set_property(str(k), str(v)) if PARAMS['transfer_to_PNAS'] or PARAMS['transfer_to_Fossil']: PARAMS['include_classes'] = STAGE1_data_files.classes train_dataset, validation_dataset, STAGE2_data_files, STAGE2_excluded = create_dataset( dataset_name=PARAMS['stage_2'] ['dataset_name'], #PARAMS['dataset_name'], threshold=PARAMS['threshold'], batch_size=PARAMS['BATCH_SIZE'], buffer_size=PARAMS['buffer_size'], exclude_classes=PARAMS['exclude_classes'], include_classes=PARAMS['include_classes'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augmentations=PARAMS['augmentations'], seed=PARAMS['seed']) PARAMS['num_classes'] = STAGE2_data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size']['train'] = int(STAGE2_data_files.num_samples * PARAMS['splits']['train']) PARAMS['splits_size']['validation'] = int( STAGE2_data_files.num_samples * PARAMS['splits']['validation']) PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[ 'BATCH_SIZE'] PARAMS['validation_steps'] = PARAMS['splits_size'][ 'validation'] // PARAMS['BATCH_SIZE'] backup_callback = BackupAndRestore(PARAMS['checkpoints_path']) backup_callback.set_model(model) callbacks = [ neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True) ] #, history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=PARAMS['steps_per_epoch'], validation_steps=PARAMS['validation_steps']) return history
parser.add_argument("--num_jobs", type=int, default=8) parser.add_argument("--record_filtered", action="store_true") parser.add_argument("--use_atomrings", action="store_true") args = parser.parse_args() args.algorithm = "gegl_constrained" random.seed(0) device = torch.device(0) neptune.init( project_qualified_name="sungsoo.ahn/deep-molecular-optimization") experiment = neptune.create_experiment(name=args.algorithm, params=vars(args)) neptune.append_tag( f"{args.smi_id_min:03d}_{args.smi_id_max:03d}_{args.similarity_threshold}" .replace(".", "")) char_dict = SmilesCharDictionary(dataset=args.dataset, max_smi_len=args.max_smiles_length) dataset = load_dataset(char_dict=char_dict, smi_path=args.dataset_path) if args.use_atomrings: similarity_constrained_penalized_logp = similarity_constrained_penalized_logp_atomrings else: similarity_constrained_penalized_logp = similarity_constrained_penalized_logp_cyclebasis for smi_id in range(args.smi_id_min, args.smi_id_max): print(f"ID: {smi_id}") reference_smi = dataset[smi_id] benchmark = similarity_constrained_penalized_logp(
import neptune neptune.init( project_qualified_name= 'shared/onboarding', # change this to your `workspace_name/project_name` api_token='ANONYMOUS', # change this to your api token ) # Step 3: Create an experiment and save parameters neptune.create_experiment(name='great-idea', params=params) # Step 4. Add tags to organize things neptune.append_tag(['experiment-organization', 'me']) # Step 5. Add logging of train and evaluation metrics neptune.log_metric('train_f1', train_f1) neptune.log_metric('test_f1', test_f1) # Step 6. Run a few experiments with different parameters # tests current_exp = neptune.get_experiment() correct_logs = ['train_f1', 'test_f1'] if set(current_exp.get_logs().keys()) != set(correct_logs): raise ValueError()
# select project neptune.init('USERNAME/example-project') # define parameters PARAMS = {'timeseries_factor': 1.7, 'n_iterations': 200, 'n_images': 7} # create experiment neptune.create_experiment(name='timeseries_example', params=PARAMS) # log some metrics for i in range(1, PARAMS['n_iterations']): neptune.log_metric('iteration', i) neptune.log_metric('timeseries', PARAMS['timeseries_factor'] * np.cos(i / 10)) neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2)) # log property (key:value pair) neptune.set_property('timeseries_data_hash', '123e4567') # add tag to the experiment neptune.append_tag('timeseries_modeling') # log some images for j in range(PARAMS['n_images']): array = np.random.rand(10, 10, 3) * 255 array = np.repeat(array, 30, 0) array = np.repeat(array, 30, 1) neptune.log_image('mosaics', array) neptune.stop()