class CometMLLogger: def __init__(self): self.experiment = Experiment(api_key="iU4f44llKnowZwmrEo9wfR2ch", project_name="general", workspace="yahyaalaamassoud", log_code=False, log_graph=False) def log_params(self, params: Dict[str, int]): self.experiment.log_parameters(params) def log_metric(self, metric_name, metric_val, step=None): self.experiment.log_metric(metric_name, metric_val, step=step) def log_metrics(self, metrics: Dict[str, float], step=None): self.experiment.log_metrics(metrics, step=step) def log_figure(self, figure_name: str, step: str): self.experiment.log_image(image_data=self.__savefig(), name=figure_name, step=step, overwrite=False) def __savefig(self): buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) return Image.open(io.BytesIO(buf.getvalue()))
def main(): hyper_params = { "learning_rate": 1, "style_weight": 10000, "content_weight": 1, "n_steps": 300 } experiment = Experiment(api_key="a604AfX0S9Bmt6HdpMHxg9MCI", project_name="style-transfer", workspace="polmonroig") experiment.log_parameters(hyper_params) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): print("Currently using cuda device.") # define file paths and directories content_images_dir = "content_images/" style_images_dir = "style_images/" output_images_dir = "output_images/" content_image_name = "content_01.png" style_image_name = "style_01.jpg" output_image_path = join( output_images_dir, content_image_name.split('.')[0] + "_" + style_image_name.split('.')[0] + ".jpg") content_image_path = join(content_images_dir, content_image_name) style_image_path = join(style_images_dir, style_image_name) # define image file manager max_shape = (800, 800) fileManager = FileManager(content_image_path, style_image_path, device, max_shape) # read images content_image, style_image = fileManager.read_images() input_image = content_image.clone() model = ArtNet(device=device) output_image = model.train(hyper_params['content_weight'], hyper_params['style_weight'], hyper_params['n_steps'], content_image, style_image, input_image, hyper_params['learning_rate'], experiment) fileManager.save_image(output_image, output_image_path) experiment.log_image( output_image_path, content_image_name.split('.')[0] + "_" + style_image_name.split('.')[0])
def upload_images_to_exp(path, exp=None, project_name="climategan-eval", sleep=-1, verbose=0): ims = find_images(path) end = None c = cols() if verbose == 1: end = "\r" if verbose > 1: end = "\n" if exp is None: exp = Experiment(project_name=project_name) for im in ims: exp.log_image(str(im)) if verbose > 0: if verbose == 1: print(" " * (c - 1), end="\r", flush=True) print(str(im), end=end, flush=True) if sleep > 0: time.sleep(sleep) return exp
policy_loss_sum = torch.cat(policy_loss).sum() / len(policy_loss) if i_episode >= params["KLD_DELAY"]: policy_loss_sum += params["KLD_WEIGHT"] * KLD policy_loss_sum += params["VARIANCE_WEIGHT"] * torch.norm(latent_variance) loss_copy = policy_loss_sum.detach().cpu().numpy().copy() policy_loss_sum.backward() optimizer.step() if i_episode % params["CHKP_FREQ"] == 0: torch.save( policy.state_dict(), os.path.join(exp_dir, 'reinforce-' + str(i_episode) + '.pkl')) img = np.zeros((params["HEIGHT"], params["WIDTH"] * 3, 3), dtype=np.uint8) img[:, :params["WIDTH"], :] = np.around(state_raw * 255, 0) img[:, params["WIDTH"]:params["WIDTH"] * 2, :] = np.around( next_state * 255, 0) diff = np.sum(state_raw - next_state, axis=2) + 2 / 4 diff = np.dstack((diff, diff, diff)) img[:, params["WIDTH"] * 2:, :] = np.around((diff) * 255, 0) experiment.log_image(img, name="{:04d}".format(i_episode)) experiment.log_metric("rewards", np.mean(rewards_raw)) experiment.log_metric("loss", float(loss_copy)) experiment.log_metric("kld", KLD.item())
print("Creating model...") # Build model with supplied parameters model = models.get_model(model_name, (1, ), len(labels), model_params) print("Built model using parameters:") for key, value in model_params.items(): print("{}: {}".format(key, value)) # Display a model summary and create/save a model graph definition and image model.summary() model_image_file = os.path.join(output_dir, experiment_name + '_model.png') tf.keras.utils.plot_model(model, to_file=model_image_file, show_layer_names=False, show_shapes=True) experiment.log_image(model_image_file) # Initialise variables sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) tf.keras.backend.set_session(sess) # Load initialisation weights if set if load_model and init_ckpt_file and os.path.exists( os.path.join(checkpoint_dir, init_ckpt_file)): model.load_weights(os.path.join(checkpoint_dir, init_ckpt_file)) print("Loaded model weights from: " + init_ckpt_file) # Initialise model checkpointer and early stopping monitor checkpointer = check_pointer.Checkpointer(checkpoint_dir,
class CometMLMonitor(MonitorBase): """ Send scalar data and the graph to https://www.comet.ml. Note: 1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack. 2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze. Therefore the feature is disabled by default. """ def __init__(self, experiment=None, tags=None, **kwargs): """ Args: experiment (comet_ml.Experiment): if provided, invalidate all other arguments tags (list[str]): experiment tags kwargs: arguments used to initialize :class:`comet_ml.Experiment`, such as project name, API key, etc. Refer to its documentation for details. """ if experiment is not None: self._exp = experiment assert tags is None and len(kwargs) == 0 else: from comet_ml import Experiment kwargs.setdefault( 'log_code', True ) # though it's not functioning, git patch logging requires it kwargs.setdefault('auto_output_logging', None) self._exp = Experiment(**kwargs) if tags is not None: self._exp.add_tags(tags) self._exp.set_code("Code logging is impossible ...") self._exp.log_dependency('tensorpack', __git_version__) @property def experiment(self): """ The :class:`comet_ml.Experiment` instance. """ return self._exp def _before_train(self): self._exp.set_model_graph(tf.get_default_graph()) @HIDE_DOC def process_scalar(self, name, val): self._exp.log_metric(name, val, step=self.global_step) @HIDE_DOC def process_image(self, name, val): self._exp.set_step(self.global_step) for idx, v in enumerate(val): log_name = "{}_step{}{}".format( name, self.global_step, "_" + str(idx) if len(val) > 1 else "") self._exp.log_image(v, image_format="jpeg", name=log_name, image_minmax=(0, 255)) def _after_train(self): self._exp.end() def _after_epoch(self): self._exp.log_epoch_end(self.epoch_num)
experiment.log_metric("train loss", avg_train_loss, step=step) experiment.log_metric("train perplexity", avg_train_perplexity, step=step) experiment.log_metric("val loss", avg_val_loss, step=step) experiment.log_metric("val perplexity", avg_val_perplexity, step=step) outfile_loss_plot = "model/loss_plot.png" performance_plot( train_loss_list, val_loss_list, outfile=outfile_loss_plot, title="Loss vs Epoch", ylab="Avg. Batch Loss", ) experiment.log_image(outfile_loss_plot) outfile_ppl_plot = "model/ppl_plot.png" performance_plot( train_ppl_list, val_ppl_list, outfile=outfile_ppl_plot, title="Perplpexity vs Epoch", ylab="Avg. Batch Perplexity", ) experiment.log_image(outfile_ppl_plot) print("done")
tmp = df[df['class'] == labels[i]][:1].reset_index() path = 'UrbanSound8K/audio/fold{}/{}'.format(tmp['fold'][0], tmp['slice_file_name'][0]) files[labels[i]] = path fig = plt.figure(figsize=(15,15)) fig.subplots_adjust(hspace=0.4, wspace=0.4) for i, label in enumerate(labels): fn = files[label] fig.add_subplot(5, 2, i+1) plt.title(label) data, sample_rate = librosa.load(fn) _ = librosa.display.waveplot(data, sr= sample_rate) plt.savefig('class_examples.png') # Log graphic of waveforms to Comet experiment.log_image('class_examples.png') # Log audio files to Comet for debugging for label in labels: fn = files[label] experiment.log_audio(fn, metadata = {'name': label}) audiodata = [] for index, row in df.iterrows(): fn = 'UrbanSound8K/audio/fold{}/{}'.format(row['fold'], row['slice_file_name']) data = read_file_properties(fn) audiodata.append(data) # Convert to dataframe
opt) # regular setup: load and print networks; create schedulers # create a website web_dir = os.path.join( opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.epoch)) # define the website directory webpage = html.HTML( web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.epoch)) # test with eval mode. This only affects layers like batchnorm and dropout. # For [pix2pix]: we use batchnorm and dropout in the original pix2pix. You can experiment it with and without eval() mode. # For [CycleGAN]: It should not affect CycleGAN as CycleGAN uses instancenorm without dropout. if opt.eval: model.eval() for i, data in enumerate(dataset): if i >= opt.num_test: # only apply our model to opt.num_test images. break model.set_input(data) # unpack data from data loader model.test() # run inference visuals = model.get_current_visuals() # get image results img_path = model.get_image_paths() comet_exp.log_image(img_path[0]) # get image paths if i % 5 == 0: # save images to an HTML file print('processing (%04d)-th image... %s' % (i, img_path)) save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio, width=opt.display_winsize) webpage.save() # save the HTML comet_exp.log_asset_folder(webpage.get_image_dir())
top_n_largest] top_n_best_eigen_papers_for_rejection = dim_reduction.components_[ top_n_smallest] shape = (args.width, args.height) if args.mode == Mode.RGBBigImage: shape = (args.width * 2, args.height * 4, 3) acceptance_eigen = [ paper.reshape(shape) for paper in top_n_best_eigen_papers_for_acceptance ] rejection_eigen = [ paper.reshape(shape) for paper in top_n_best_eigen_papers_for_rejection ] # Make them into images acceptance_eigen = [(paper - paper.min()) / paper.max() for paper in acceptance_eigen] rejection_eigen = [(paper - paper.min()) / paper.max() for paper in rejection_eigen] for i, acceptance in enumerate(reversed(acceptance_eigen), 1): experiment.log_image(acceptance, name=f"{i} - acceptance") for i, rejection in enumerate(rejection_eigen, 1): experiment.log_image(rejection, name=f"{i} - rejection") logger.info(f"Execution time was {time.time() - timestamp} seconds")
class Visualizer(): """This class includes several functions that can display/save images and print/save logging information. It uses a Python library 'visdom' for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images. """ def __init__(self, opt): self.opt = opt # cache the option self.display_id = opt.display_id self.name = opt.name self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') self.experiment = Experiment(api_key="PdU8dZ4I54Nlre5Gz4pW6cY0l", project_name="sketch-pix2pix", workspace="edolev89") self.experiment.set_name('%s_%d' % (opt.name, time.time())) self.experiment.log_parameters(vars(opt)) def display_current_results(self, visuals, epoch): """Display current results on visdom; save current results to an HTML file. Parameters: visuals (OrderedDict) - - dictionary of images to display or save epoch (int) - - the current epoch save_result (bool) - - if save the current results to an HTML file """ if self.display_id > 0: # show images in the browser using visdom print('logging images...') images = [ util.tensor2im(image_tensor).transpose([2, 0, 1]) for image_tensor in visuals.values() ] image_grid_arr = np.concatenate(images, axis=2) image_grid_arr_channels_last = np.moveaxis(image_grid_arr, 0, -1) image_grid = Image.fromarray( np.uint8(image_grid_arr_channels_last), 'RGB') self.experiment.log_image(image_grid, name='epoch_%d' % epoch) def plot_current_losses(self, epoch, counter_ratio, losses): """display the current losses on visdom display: dictionary of error labels and values Parameters: epoch (int) -- current epoch counter_ratio (float) -- progress (percentage) in the current epoch, between 0 to 1 losses (OrderedDict) -- training losses stored in the format of (name, float) pairs """ if counter_ratio == 1.0: self.experiment.log_metrics(losses) # losses: same format as |losses| of plot_current_losses def print_current_losses(self, epoch, iters, losses, t_comp, t_data): """print current losses on console; also save the losses to the disk Parameters: epoch (int) -- current epoch iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) losses (OrderedDict) -- training losses stored in the format of (name, float) pairs t_comp (float) -- computational time per data point (normalized by batch_size) t_data (float) -- data loading time per data point (normalized by batch_size) """ message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % ( epoch, iters, t_comp, t_data) for k, v in losses.items(): message += '%s: %.3f ' % (k, v) print(message) # print the message with open(self.log_name, "a") as log_file: log_file.write('%s\n' % message) # save the message
if args.plot: # Plot prediction images fig_filename = plot_dir / imgs_paths[idx].name plot_images( fig_filename, img, label, pred, metrics_dict, maps_dict, edge_coherence, pred_edge, label_edge, ) exp.log_image(fig_filename) if not args.no_paint: masked = img * (1 - pred[..., None]) flooded = img_as_ubyte( (painted[idx].permute(1, 2, 0).cpu().numpy() + 1) / 2) combined = np.concatenate([img, masked, flooded], 1) exp.log_image(combined, imgs_paths[idx].name) if args.write_metrics: pred_out = model_metrics_path / "pred" pred_out.mkdir(exist_ok=True) imsave( pred_out / f"{imgs_paths[idx].stem}_pred.png", pred.astype(np.uint8), ) for k, v in maps_dict.items():
experiment.log_confusion_matrix(y_true=y_true, y_predicted=y_pred, labels=list(class_labels.values()), title="Confusion Matrix") #Predict predict_tfrecords = glob.glob( "/orange/ewhite/b.weinstein/Houston2018/tfrecords/predict/*.tfrecord") results = model.predict_raster(predict_tfrecords, batch_size=512) #predicted classes print(results.label.unique()) predicted_raster = visualize.create_raster(results) print(np.unique(predicted_raster)) experiment.log_image(name="Prediction", image_data=predicted_raster, image_colormap=visualize.discrete_cmap(20, base_cmap="jet")) #Save as tif for resampling prediction_path = os.path.join(save_dir, "prediction.tif") predicted_raster = np.expand_dims(predicted_raster, 0) resample.create_tif( "/home/b.weinstein/DeepTreeAttention/data/processed/20170218_UH_CASI_S4_NAD83.tif", filename=prediction_path, numpy_array=predicted_raster) filename = resample.resample(prediction_path) experiment.log_image(name="Resampled Prediction", image_data=filename, image_colormap=visualize.discrete_cmap(20, base_cmap="jet"))
print('Test Accuracy : {} %, Loss : {:.4f}'.format(test_acc, meanLoss / (i + 1))) # Logging reults experiment.log_metric("test_loss", meanLoss / (i + 1), step=epoch) experiment.log_metric("test_accuracy", acc, step=epoch) # # plotting graphs (not needed if using comet ml) # plt.figure() # x = np.linspace(0,hyper_params["num_epochs"],hyper_params["num_epochs"]) # plt.subplot(1,2,1) # plt.plot(x,trainLoss) # plt.plot(x,validLoss) # # plt.subplot(1,2,2) # plt.plot(x,validAcc) # plt.savefig(path+'/learning_curve.png') # plt.show() # Plotting confusion matrix plt.figure() cm = confusion_matrix(ground_truth, predictions) plot_confusion_matrix(cm.astype(np.int64), classes=["None", "w", "q", "e", "w+q", "w+e"], path=".") experiment.log_image("./confusion_matrix.png") dict = {"test_acc": test_acc} experiment.send_notification("finished", "ok tamere", dict)
subset='validation') #Don't shuffle test set generator so you can index the batches right with predict_gen test_set = val_set print("train data directory set to", str(train_dir)) print("Uploading sample images from image data generator to Comet") sample_x, sample_y = next(training_set) for i in range(4): img_example= image.array_to_img(sample_x[i]) img_name = './img_output/example_img_augmentation' + str(i) + '.jpg' img_example.save(img_name) experiment.log_image(img_name) model = get_architecture(model_name=model_name, input_dim_length=input_dim_length, input_dim_width=input_dim_width,num_dense_layers=num_dense_layers,num_dense_nodes=num_dense_nodes,num_class=num_class,dropout_pct=dropout_pct, weights="imagenet") #model = get_architecture(model_name=model_name, input_dim_width=224, input_dim_length=224,num_dense_layers=0,num_dense_nodes=0,num_class=16,dropout_pct=0.2,weights="imagenet") #model.summary() print(str(model_name), "loaded as initial model.") #optimzer if optimizer_choice == 'adam': #optim = adam optim = keras.optimizers.Adam(lr=initial_lr, decay = decay_choice) elif optimizer_choice == 'rmsprop':
results_dir + 'predictions/predictions_result.txt', 'evaluation loss:' + str(evaluation_loss) + ' evaluation accuracy:' + str(evaluation_accuracy) + ' evaluation dice coef:' + str(evaluation_dice_coef)) make_file_and_write(results_dir + 'description.txt', description) predicted_masks = model.predict(test_images, 1, verbose=1) converted_test_images = convert_one_class_images_to_pixel_images_and_save( results_dir + 'predictions/images/', test_images, shape=input_shape) converted_test_masks = convert_multiclass_matirx_masks_to_pixel_masks_and_save( results_dir + 'predictions/masks/', test_masks, mask_pixel_values_aka_classes) converted_predicted_masks = convert_multiclass_matirx_masks_to_pixel_masks_and_save( results_dir + 'predictions/results/', predicted_masks, mask_pixel_values_aka_classes) plot_model(model, to_file=results_dir + 'model_architecture.png', show_shapes=True, show_layer_names=True, rankdir='TB') experiment.log_image(results_dir + 'model_architecture.png', name='model_architecture.png') experiment.log_asset(results_dir + 'unet.hdf5', file_name='unet.hdf5') for index in range(len(test_images)): experiment.log_image(converted_test_images[index], name=str(index) + '_test_image') experiment.log_image(converted_test_masks[index], name=str(index) + '_test_mask') experiment.log_image(converted_predicted_masks[index], name=str(index) + '_predicted_mask')
cv2_img = cv2.imread("[filename]") cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB) demo_img = Image.open("[filename]") shape = np.array(demo_img) shape = shape.shape[:2] detections = detect_image(demo_img, opt.img_size, model, device, conf_thres=0.5, nms_thres=0.5) if detections is not None: # Rescale boxes to original image detections = rescale_boxes(detections, opt.img_size, shape) unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) # Bounding-box colors cmap = plt.get_cmap("tab20b") colors = [cmap(i) for i in np.linspace(0, 1, 20)] bbox_colors = random.sample(colors, n_cls_preds) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: classes = ["person", "ball"] print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item())) box_w = x2 - x1 box_h = y2 - y1 color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # Create a Rectangle patch # Draw bbox cv2.rectangle(cv2_img, (int(x1), int(y1)), (int(x1+box_w), int(y1+box_h)), color, 4) # Add label cv2.putText(cv2_img, classes[int(cls_pred)], (int(x1), int(y1)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) experiment.log_image(cv2_img, name="test_img_{}".format(epoch))
s_array = np.array(df1) # %% def get_class_frequencies(): positive_freq = s_array.sum(axis=0) / s_array.shape[0] negative_freq = np.ones(positive_freq.shape) - positive_freq return positive_freq, negative_freq p,n = get_class_frequencies() # %% data = pd.DataFrame({"Class": df1.columns, "Label": "Positive", "Value": p}) data = data.append([{"Class": df1.columns[l], "Label": "Negative", "Value": v} for l, v in enumerate(n)], ignore_index=True) plt.xticks(rotation=90) f = sns.barplot(x="Class", y="Value",hue="Label", data=data) plt.savefig("skewness.png") experiment.log_image(image_data = 'skewness.png') # %% pos_weights = n neg_weights = p pos_contribution = p * pos_weights neg_contribution = n * neg_weights print(p) print(n) print("Weight to be added: ",pos_contribution) # %% data = pd.DataFrame({"Class": df1.columns, "Label": "Positive", "Value": pos_contribution}) data = data.append([{"Class": df1.columns[l], "Label": "Negative", "Value": v} for l, v in enumerate(neg_contribution)], ignore_index=True) plt.xticks(rotation=90)
if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Get confusion matrix picture before training: log_confusion_matrix(experiment, model, trainer, 0, 0) # Run the training trainer.run() # Report created images to comet.ml: ## If you want to include a graph made by chainer, you can: #if args.plot and extensions.PlotReport.available(): # experiment.log_image('result/loss.png') # experiment.log_image('result/accuracy.png') # Report the graph, as dot language: (graph, ) = pydot.graph_from_dot_file('result/cg.dot') graph.write_png('result/cg.png') experiment.log_image('result/cg.png') with open("result/cg.dot") as fp: desc = fp.readlines() experiment.set_model_graph("\n".join(desc)) # Report a URL: experiment.log_html_url( "https://github.com/chainer/chainer/" "blob/master/examples/mnist/train_mnist.py", label="This MNIST example is based on")
print('Model loaded.') n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) log_dict = helpers.flatten_dict(config) log_dict.update({'trainable_params': n_params}) exp.log_parameters(log_dict) test_dataset = data.CSVDatasetsMerger(helpers.get_datasets_paths(config, 'test')) test_dataloader = DataLoader(test_dataset, batch_size=config['evaluation']['eval_batch_size'], shuffle=False, drop_last=False, num_workers=config['evaluation']['n_eval_workers'], collate_fn=text_proc) evaluator = Evaluation(test_dataloader, config) print('Testing ...') results, assets, image_fns = evaluator.eval_model(model, finished_training=True) print('Finished testing. Uploading ...') exp.log_metrics(results, step=0, epoch=0) [exp.log_asset_data(asset, step=0) for asset in assets] [exp.log_image(fn, step=0) for fn in image_fns] print('Finished uploading.')
'discount_factor': discount_factor, 'random_process_theta': random_process_args['theta'], 'log_interval_steps': log_interval_steps, 'train_data_shape': train_data_df.shape, 'test_data_shape': test_data_df.shape, 'dataset_name': dataset_name, 'device_type': device_type } print('Running with params: %s' % str(params)) if log_comet: experiment.log_parameters(params) experiment.add_tags(comet_tags) if plot_stocks: experiment.log_image('train_stocks_plot.png', 'train_window_stocks') if test_stocks_plot_fig is not None: experiment.log_image('test_stocks_plot.png', 'test_window_stocks') num_stocks = train_data_df.shape[1] num_states_and_actions = num_stocks # init DDPG agent agent = DDPG(num_states_and_actions, num_states_and_actions, minibatch_size, random_process_args, learning_rate=learning_rate, discount_factor=discount_factor, device_type=device_type, is_training=True)
'maker_has_website' ]].corr() corr.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2) # Drop self-correlations dropSelf = numpy.zeros_like(corr) dropSelf[numpy.triu_indices_from(dropSelf)] = True # Generate Color Map colormap = seaborn.diverging_palette(220, 10, as_cmap=True) # Generate Heat Map, allow annotations and place floats in map seaborn.heatmap(corr, cmap=colormap, annot=True, fmt=".2f", mask=dropSelf) # Apply ticks pyplot.xticks(range(len(corr.columns)), corr.columns) pyplot.yticks(range(len(corr.columns)), corr.columns) pyplot.savefig('corrmatrix.png') experiment.log_image(name='correlation matrix', image_data='corrmatrix.png', image_format='png') # build train and test sets cols = [ 'hunter_followers', 'hunter_has_website', 'maker_followers', 'maker_has_website' ] X = df[cols] X_train, X_test, y_train, y_test = train_test_split(X, df.is_featured, test_size=0.25, random_state=seed, stratify=df.is_featured) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train)
def train(self): # comet_ml # Create an experiment experiment = Experiment(api_key="B6hzNydshIpZSG2Xi9BDG9gdG", project_name="glow-mnist", workspace="voletiv") hparams_dict = self.hparams_dict() experiment.log_parameters(hparams_dict) # set to training state self.graph.train() self.global_step = self.loaded_step # begin to train for epoch in range(self.n_epoches): print("epoch", epoch) progress = tqdm(self.data_loader) for i_batch, batch in enumerate(progress): experiment.set_step(self.global_step) # update learning rate lr = self.lrschedule["func"](global_step=self.global_step, **self.lrschedule["args"]) for param_group in self.optim.param_groups: param_group['lr'] = lr self.optim.zero_grad() # log if self.global_step % self.scalar_log_gaps == 0: # self.writer.add_scalar("lr/lr", lr, self.global_step) experiment.log_metrics({"lr": lr, "epoch": epoch+i_batch/len(self.data_loader)}) # get batch data for k in batch: batch[k] = batch[k].to(self.data_device) x = batch["x"] y = None y_onehot = None if self.y_condition: if self.y_criterion == "multi-classes": assert "y_onehot" in batch, "multi-classes ask for `y_onehot` (torch.FloatTensor onehot)" y_onehot = batch["y_onehot"] elif self.y_criterion == "single-class": assert "y" in batch, "single-class ask for `y` (torch.LongTensor indexes)" y = batch["y"] y_onehot = thops.onehot(y, num_classes=self.y_classes) # at first time, initialize ActNorm if self.global_step == 0: self.graph(x[:self.batch_size // len(self.devices), ...], y_onehot[:self.batch_size // len(self.devices), ...] if y_onehot is not None else None) # parallel if len(self.devices) > 1 and not hasattr(self.graph, "module"): print("[Parallel] move to {}".format(self.devices)) self.graph = torch.nn.parallel.DataParallel(self.graph, self.devices, self.devices[0]) # forward phase z, nll, y_logits = self.graph(x=x, y_onehot=y_onehot) # loss_generative loss_generative = Glow.loss_generative(nll) # loss_classes loss_classes = 0 if self.y_condition: loss_classes = (Glow.loss_multi_classes(y_logits, y_onehot) if self.y_criterion == "multi-classes" else Glow.loss_class(y_logits, y)) # total loss loss = loss_generative + loss_classes * self.weight_y # log if self.global_step % self.scalar_log_gaps == 0: # self.writer.add_scalar("loss/loss_generative", loss_generative, self.global_step) experiment.log_metrics({"loss_generative": loss_generative}) if self.y_condition: # self.writer.add_scalar("loss/loss_classes", loss_classes, self.global_step) experiment.log_metrics({"loss_classes": loss_classes, "total_loss": loss}) # backward self.graph.zero_grad() self.optim.zero_grad() loss.backward() # operate grad if self.max_grad_clip is not None and self.max_grad_clip > 0: torch.nn.utils.clip_grad_value_(self.graph.parameters(), self.max_grad_clip) if self.max_grad_norm is not None and self.max_grad_norm > 0: grad_norm = torch.nn.utils.clip_grad_norm_(self.graph.parameters(), self.max_grad_norm) if self.global_step % self.scalar_log_gaps == 0: # self.writer.add_scalar("grad_norm/grad_norm", grad_norm, self.global_step) experiment.log_metrics({"grad_norm": grad_norm}) # step self.optim.step() # checkpoints if self.global_step % self.checkpoints_gap == 0 and self.global_step > 0: save(global_step=self.global_step, graph=self.graph, optim=self.optim, pkg_dir=self.checkpoints_dir, is_best=True, max_checkpoints=self.max_checkpoints) # plot images if self.global_step % self.plot_gaps == 0: img = self.graph(z=z, y_onehot=y_onehot, reverse=True) # img = torch.clamp(img, min=0, max=1.0) if self.y_condition: if self.y_criterion == "multi-classes": y_pred = torch.sigmoid(y_logits) elif self.y_criterion == "single-class": y_pred = thops.onehot(torch.argmax(F.softmax(y_logits, dim=1), dim=1, keepdim=True), self.y_classes) y_true = y_onehot # plot images # self.writer.add_image("0_reverse/{}".format(bi), torch.cat((img[bi], batch["x"][bi]), dim=1), self.global_step) vutils.save_image(torch.stack([torch.cat((img[bi], batch["x"][bi]), dim=1) for bi in range(min([len(img), self.n_image_samples]))]), '/tmp/vikramvoleti.png', nrow=10) experiment.log_image('/tmp/vikramvoleti_rev.png', file_name="0_reverse") # plot preds # for bi in range(min([len(img), self.n_image_samples])): # # wandb.log({"0_reverse_{}".format(bi): [wandb.Image(torch.cat((img[bi], batch["x"][bi]), dim=1), caption="0_reverse/{}".format(bi))]}, step=self.global_step) # if self.y_condition: # # self.writer.add_image("1_prob/{}".format(bi), plot_prob([y_pred[bi], y_true[bi]], ["pred", "true"]), self.global_step) # wandb.log({"1_prob_{}".format(bi): [wandb.Image(plot_prob([y_pred[bi], y_true[bi]], ["pred", "true"]))]}, step=self.global_step) # inference if hasattr(self, "inference_gap"): if self.global_step % self.inference_gap == 0: try: img = self.graph(z=None, y_onehot=inference_y_onehot, eps_std=0.5, reverse=True) except NameError: inference_y_onehot = torch.zeros_like(y_onehot, device=torch.device('cpu')) for i in range(inference_y_onehot.size(0)): inference_y_onehot[i, (i % inference_y_onehot.size(1))] = 1. # now inference_y_onehot = inference_y_onehot.to(y_onehot.device) img = self.graph(z=None, y_onehot=inference_y_onehot, eps_std=0.5, reverse=True) # grid vutils.save_image(img[:min([len(img), self.n_image_samples])], '/tmp/vikramvoleti.png', nrow=10) experiment.log_image('/tmp/vikramvoleti_sam.png', file_name="1_samples") # img = torch.clamp(img, min=0, max=1.0) # for bi in range(min([len(img), n_images])): # # self.writer.add_image("2_sample/{}".format(bi), img[bi], self.global_step) # wandb.log({"2_sample_{}".format(bi): [wandb.Image(img[bi])]}, step=self.global_step) if self.global_step == 0: subprocess.run('nvidia-smi') # global step self.global_step += 1
def training_loop( G_args = {}, # Options for generator network. D_args = {}, # Options for discriminator network. G_opt_args = {}, # Options for generator optimizer. D_opt_args = {}, # Options for discriminator optimizer. G_loss_args = {}, # Options for generator loss. D_loss_args = {}, # Options for discriminator loss. dataset_args = {}, # Options for dataset.load_dataset(). sched_args = {}, # Options for train.TrainingSchedule. grid_args = {}, # Options for train.setup_snapshot_image_grid(). metric_arg_list = [], # Options for MetricGroup. tf_config = {}, # Options for tflib.init_tf(). data_dir = None, # Directory to load datasets from. G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights. minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. lazy_regularization = True, # Perform regularization as a separate training step? G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? total_kimg = 25000, # Total length of the training, measured in thousands of real images. mirror_augment = False, # Enable mirror augment? drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? save_weight_histograms = False, # Include weight histograms in the tfevents file? resume_pkl = None, # Network pickle to resume training from, None = train from scratch. resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting. resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training? # Initialize dnnlib and TensorFlow. tflib.init_tf(tf_config) num_gpus = dnnlib.submit_config.num_gpus e = Experiment("Your API Key") e.log_parameters(params) # Load training set. training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args) grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args) misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) for i in range(len(training_set)) e.log_image(training_set[i]) # Construct or load networks. with tf.device('/gpu:0'): if resume_pkl is None or resume_with_new_nets: print('Constructing networks...') G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) Gs = G.clone('Gs') if resume_pkl is not None: print('Loading networks from "%s"...' % resume_pkl) rG, rD, rGs = misc.load_pkl(resume_pkl) if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs) else: G = rG; D = rD; Gs = rGs # Print layers and generate initial image snapshot. G.print_layers(); D.print_layers() sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args) grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size) # Setup training inputs. print('Building TensorFlow graph...') with tf.name_scope('Inputs'), tf.device('/cpu:0'): lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[]) minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[]) minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus) Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0 # Setup optimizers. G_opt_args = dict(G_opt_args) D_opt_args = dict(D_opt_args) for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: args['minibatch_multiplier'] = minibatch_multiplier args['learning_rate'] = lrate_in if lazy_regularization: mb_ratio = reg_interval / (reg_interval + 1) args['learning_rate'] *= mb_ratio if 'beta1' in args: args['beta1'] **= mb_ratio if 'beta2' in args: args['beta2'] **= mb_ratio G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) # Build training graph for each GPU. images = e.log_image(dnnlib.make_run_dir_path('fakes_init.png'), 'Initial Fakes') for i in range(len(image)) e.log_image(images[i]) data_fetch_ops = [] for gpu in range(num_gpus): with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): # Create GPU-specific shadow copies of G and D. G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') # Fetch training data via temporary variables. with tf.name_scope('DataFetch'): sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args) reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape)) labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size])) reals_write, labels_write = training_set.get_minibatch_tf() reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net) reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0) labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0) data_fetch_ops += [tf.assign(reals_var, reals_write)] data_fetch_ops += [tf.assign(labels_var, labels_write)] reals_read = reals_var[:minibatch_gpu_in] labels_read = labels_var[:minibatch_gpu_in] # Evaluate loss functions. lod_assign_ops = [] if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)] if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)] with tf.control_dependencies(lod_assign_ops): with tf.name_scope('G_loss'): G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args) with tf.name_scope('D_loss'): D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args) # Register gradients. if not lazy_regularization: if G_reg is not None: G_loss += G_reg if D_reg is not None: D_loss += D_reg else: if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables) if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables) G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) # Setup training ops. data_fetch_op = tf.group(*data_fetch_ops) G_train_op = G_opt.apply_updates() D_train_op = D_opt.apply_updates() G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta) # Finalize graph. with tf.device('/gpu:0'): try: peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() except tf.errors.NotFoundError: peak_gpu_mem_op = tf.constant(0) tflib.init_uninitialized_vars() print('Initializing logs...') summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path()) if save_tf_graph: summary_log.add_graph(tf.get_default_graph()) if save_weight_histograms: G.setup_weight_histograms(); D.setup_weight_histograms() metrics = metric_base.MetricGroup(metric_arg_list) print('Training for %d kimg...\n' % total_kimg) dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() cur_nimg = int(resume_kimg * 1000) cur_tick = -1 tick_start_nimg = cur_nimg prev_lod = -1.0 running_mb_counter = 0 while cur_nimg < total_kimg * 1000: if dnnlib.RunContext.get().should_stop(): break # Choose training parameters and configure training ops. sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args) assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0 training_set.configure(sched.minibatch_gpu, sched.lod) if reset_opt_for_new_lod: if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() prev_lod = sched.lod # Run training ops. feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu} for _repeat in range(minibatch_repeats): rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus) run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) cur_nimg += sched.minibatch_size running_mb_counter += 1 # Fast path without gradient accumulation. if len(rounds) == 1: tflib.run([G_train_op, data_fetch_op], feed_dict) if run_G_reg: tflib.run(G_reg_op, feed_dict) tflib.run([D_train_op, Gs_update_op], feed_dict) if run_D_reg: tflib.run(D_reg_op, feed_dict) # Slow path with gradient accumulation. else: for _round in rounds: tflib.run(G_train_op, feed_dict) if run_G_reg: for _round in rounds: tflib.run(G_reg_op, feed_dict) tflib.run(Gs_update_op, feed_dict) for _round in rounds: tflib.run(data_fetch_op, feed_dict) tflib.run(D_train_op, feed_dict) if run_D_reg: for _round in rounds: tflib.run(D_reg_op, feed_dict) # Perform maintenance tasks once per tick. done = (cur_nimg >= total_kimg * 1000) if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: cur_tick += 1 tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 tick_start_nimg = cur_nimg tick_time = dnnlib.RunContext.get().get_time_since_last_update() total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time # Report progress. print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % ( autosummary('Progress/tick', cur_tick), autosummary('Progress/kimg', cur_nimg / 1000.0), autosummary('Progress/lod', sched.lod), autosummary('Progress/minibatch', sched.minibatch_size), dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)), autosummary('Timing/sec_per_tick', tick_time), autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), autosummary('Timing/maintenance_sec', maintenance_time), autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30))) autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) e.log_dataset_hash(training_set) # Save snapshots. if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done): grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done): pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000)) misc.save_pkl((G, D, Gs), pkl) metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config) # Update summaries and RunContext. metrics.update_autosummaries() tflib.autosummary.save_summaries(summary_log, cur_nimg) dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time # Save final snapshot. misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl')) # All done. summary_log.close() training_set.close()
logger.info("Computing Mean Square Error") save_imgs_path = _path_sav_fold("completions") mse = Database.mean_square_error(mpe_assignment, unorm_eval_data, save_imgs_path=save_imgs_path) logger.info("MSE: {}".format(mse)) logger.info("--> Duration: {:.4f}".format(timers.tac())) print('{"metric": "MSE", "value": %f}' % (mse)) # Comet.ml if use_comet: experiment.log_metric("mse", mse) for img_idx in range(unorm_eval_data.shape[0]): experiment.log_image("{}/{}.png".format(save_imgs_path, img_idx)) # SAMPLING elif inference_type == "sampling": timers.tic() logger.info("Sampling inference.") spn_input = spn.inputs_marg if backward_masks is None: spn_input = spn.inputs backward_masks = spn.build_backward_masks(forward, sampling_amt=valid_amount) sampling_leaf = spn.build_sampling_leaf(backward_masks) feed_means_stds = {
if "--ground" not in m_path else "ground" ) names.append(name) is_ground = name == "ground" print("#" * 100) print("\n>>> Processing", name) print() outputs = get_or_load_inferences( m_path, device, xs, is_ground, im_paths, ground_model, load ) nps = numpify(outputs) np_outs[name] = nps exp = Experiment(project_name="climategan-inferences", display_summary_level=0) exp.log_parameter("names", names) exp.add_tags(tags) for i in tqdm(range(len(xs))): all_models_for_image = [] for name in names: xpmds = concat_npy_for_model(np_outs[name][i], tasks) all_models_for_image.append(xpmds) full_im = np.concatenate(all_models_for_image, axis=0) pil_im = Image.fromarray(full_im) exp.log_image(pil_im, name=im_paths[i].stem.replace(".", "_"), step=i)
# Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=hyper_params['batch_size'], shuffle=False) test_loader = torch.utils.data.DataLoader( dataset=test_dataset, batch_size=hyper_params['batch_size'], shuffle=False) # Log dataset sample images to Comet num_samples = len(train_dataset) for _ in range(10): value = random.randint(0, num_samples) tmp, _ = train_dataset[value] img = tmp.numpy()[0] experiment.log_image(img, name="groundtruth:{}".format(_)) # Log hyperparameters to Comet experiment.log_parameters(hyper_params) # RNN Model (Many-to-One) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
experiment, attrs, save=args.save) if args.test: test(model, disc, test_loader, experiment, val=False) # save every epoch during training instead # if args.save: # torch.save(model.state_dict(), './models/model.pt') # torch.save(disc.state_dict(), './models/disc.pt') if args.gen: generate_image(model, val_loader, experiment, attrs, log=False, show=True, save=True) if args.smooth: model.eval() with torch.no_grad(): z1 = torch.zeros((hyperparameters["latent_size"])).to(device) z2 = 0.01 * torch.ones((hyperparameters["latent_size"])).to(device) z3 = 1. * torch.ones((hyperparameters["latent_size"])).to(device) z4 = 10. * torch.ones((hyperparameters["latent_size"])).to(device) vecs = torch.vstack([z1, z2, z3, z4]).to(device) ys = model.decode(vecs) experiment.log_image(ys[0].cpu().numpy(), "z_zero") experiment.log_image(ys[1].cpu().numpy(), "z_hundredth") experiment.log_image(ys[2].cpu().numpy(), "z_one") experiment.log_image(ys[3].cpu().numpy(), "z_ten")
def eval( model: LiSSModel, dataset: UnalignedDataset, exp: Experiment, total_iters: int = 0, nb_ims: int = 30, ): liss = model.opt.model == "liss" metrics = {} print(f"----------- Evaluation {total_iters} ----------") with torch.no_grad(): data = { "translation": { "A": { "rec": None, "idt": None, "real": None, "fake": None }, "B": { "rec": None, "idt": None, "real": None, "fake": None }, } } force = set(["identity", "translation"]) if liss: for t in model.tasks: tmp = {} if t.eval_visuals_pred or t.log_type == "acc": tmp["pred"] = None if t.eval_visuals_target or t.log_type == "acc": tmp["target"] = None data[t.key] = {domain: deepcopy(tmp) for domain in "AB"} force |= set(model.tasks.keys) losses = { k: [] for k in dir(model) if k.startswith("loss_") and isinstance(getattr(model, k), torch.Tensor) } for i, b in enumerate(dataset): # print(f"\rEval batch {i}", end="") model.set_input(b) model.forward(force=force) model.backward_G(losses_only=True, force=force) for k in dir(model): if k.startswith("loss_") and isinstance( getattr(model, k), torch.Tensor): if k not in losses: losses[k] = [] losses[k].append(getattr(model, k).detach().cpu().item()) if liss: for t in model.tasks: for domain in "AB": for dtype in data[t.key][domain]: if (t.log_type != "acc" and data[t.key][domain][dtype] is not None and len(data[t.key][domain][dtype]) >= nb_ims): continue v = model.get( f"{domain}_{t.key}_{dtype}").detach().cpu() if data[t.key][domain][dtype] is None: data[t.key][domain][dtype] = v else: data[t.key][domain][dtype] = torch.cat( [data[t.key][domain][dtype], v], dim=0, ) # ------------------------- # ----- Translation ----- # ------------------------- if (data["translation"]["A"]["real"] is None or len(data["translation"]["A"]["real"]) < nb_ims): for domain in "AB": for dtype in ["real", "fake", "rec", "idt"]: dom = domain if dtype in {"fake", "idt"}: dom = swap_domain(domain) v = model.get(f"{dom}_{dtype}").detach().cpu() if data["translation"][domain][dtype] is None: data["translation"][domain][dtype] = v else: data["translation"][domain][dtype] = torch.cat( [data["translation"][domain][dtype], v], dim=0) # print( # f"{domain} {dtype} {len(data['translation'][domain][dtype])}" # ) for task in data: if task != "translation" and model.tasks[task].log_type != "vis": continue for domain in data[task]: for i, v in data[task][domain].items(): data[task][domain][i] = torch.cat(list(v[:nb_ims].permute( 0, 2, 3, 1)), axis=1) log_images = int(data["translation"]["A"]["real"].shape[1] / data["translation"]["A"]["real"].shape[0]) im_size = data["translation"]["A"]["real"].shape[0] ims = {"A": None, "B": None} data_keys = ["translation"] translation_keys = ["real", "fake", "rec", "idt"] data_keys += [task for task in data if task not in data_keys] for task in data_keys: if task != "translation" and model.tasks[task].log_type != "vis": continue for domain in "AB": im_types = (translation_keys if task == "translation" else list( data[task][domain].keys())) for im_type in im_types: v = data[task][domain][im_type].float() if task == "depth": v = to_min1_1(v) v = v.repeat((1, 1, 3)) v = v + 1 v = v / 2 if ims[domain] is None: ims[domain] = v else: ims[domain] = torch.cat([ims[domain], v], dim=0) # ------------------------ # ----- Comet Logs ----- # ------------------------ for i in range(0, log_images, 5): k = i + 5 exp.log_image( ims["A"][:, i * im_size:k * im_size, :].numpy(), "test_A_{}_{}_rfcidg".format(i * 5, (i + 1) * 5 - 1), step=total_iters, ) exp.log_image( ims["B"][:, i * im_size:k * im_size, :].numpy(), "test_B_{}_{}_rfcidg".format(i * 5, (i + 1) * 5 - 1), step=total_iters, ) if liss: test_losses = { "test_" + ln: np.mean(losses["loss_" + ln]) for t in model.tasks for ln in t.loss_names } test_accs = { f"test_G_{domain}_{t.key}_acc": np.mean(data[t.key][domain]["pred"].max(-1)[1].numpy() == data[ t.key][domain]["target"].numpy()) for domain in "AB" for t in model.tasks if t.log_type == "acc" } if liss: exp.log_metrics(test_losses, step=total_iters) exp.log_metrics(test_accs, step=total_iters) for t in model.tasks: if t.log_type != "acc": continue for domain in "AB": target = data[t.key][domain]["target"].numpy() pred = data[t.key][domain]["pred"].numpy() exp.log_confusion_matrix( get_one_hot(target, t.output_dim), pred, file_name= f"confusion_{domain}_{t.key}_{total_iters}.json", title=f"confusion_{domain}_{t.key}_{total_iters}.json", ) metrics = {k + "_loss": v for k, v in test_losses.items()} metrics.update(test_accs) print("----------- End Evaluation----------") return metrics
) # Add the transformation in blue overlay overlay_flood_mask( opt_test.results_dir + "epoch" + str(epoch) + "/val_set/", opt_test.results_dir + "epoch" + str(epoch) + "/overlay/", prefix="0_", ) print("overlay is saved") # add comet ML part where we take the img_paths, overlay and save if comet_exp is not None: fake_im_list = fake_img( opt_test.results_dir + "epoch" + str(epoch) + "/val_set/" ) for img_path in fake_im_list: comet_exp.log_image(img_path) list_img = os.listdir( opt_test.results_dir + "epoch" + str(epoch) + "/overlay/" ) for img_path in list_img: comet_exp.log_image( opt_test.results_dir + "epoch" + str(epoch) + "/overlay/" + img_path ) print("Inference is done, on validation set") # INFERENCE CODE END model.update_learning_rate()