def logits_latency_tensordict(self, features, mode, params, ignore_latency=False, log_searchableblock_tensor='min'): is_training = (mode == tf.estimator.ModeKeys.TRAIN) override_params = self._get_override_params_from_FLAGS() is_supergraph_training_tensor = nas_utils.build_is_supergraph_training_tensor( self.global_step, params['supergraph_train_steps'], is_training) override_params['is_supergraph_training_tensor'] = is_supergraph_training_tensor logits, latency_val, tensordict_to_write_on_tensorboard = \ net_builder.build_logits_latency_tensordict(features, model_json_path=FLAGS.model_json_path, training=is_training, override_params=override_params, model_dir=FLAGS.model_dir, ignore_latency=ignore_latency, log_searchableblock_tensor=log_searchableblock_tensor) if params['use_bfloat16']: with tf.contrib.tpu.bfloat16_scope(): logits = tf.cast(logits, tf.float32) num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()]) tf.logging.info('number of trainable parameters: {}'.format(num_params)) neptune.log_text("#Params",str(num_params )) return logits, latency_val, tensordict_to_write_on_tensorboard
def log_final(self): for elem in self.elems: neptune.log_text("unfiltered_smis", elem.smi) if self.record_filtered: for elem in self.filtered_elems: neptune.log_text("filtered_smis", elem.smi)
def main(arguments): with open(arguments.filepath, 'r') as fp: json_exp = json.load(fp) neptune.init(api_token=arguments.api_token, project_qualified_name=arguments.project_name) with neptune.create_experiment( name=json_exp['name'], description=json_exp['description'], params=json_exp['params'], properties=json_exp['properties'], tags=json_exp['tags'], upload_source_files=json_exp['upload_source_files']): for name, channel_xy in json_exp['log_metric'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_metric(name, x=x, y=y) for name, channel_xy in json_exp['log_text'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_text(name, x=x, y=y) for name, channel_xy in json_exp['log_image'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_image(name, x=x, y=y) for filename in json_exp['log_artifact']: neptune.log_artifact(filename)
def train_evaluate(search_params): hyperparameters = {} pick_kwargs = {} for k in list(search_params.keys()): if k in ['w_dfh', 'w_sharpe', 'w_100d', 'v_100d', 'v_dfh', 'v_rfl']: pick_kwargs[k] = search_params[k] else: hyperparameters[k] = search_params[k] hyperparameters['pick_kwargs'] = pick_kwargs print('------------') print(json.dumps(hyperparameters, indent=2, sort_keys=True)) sim = Sim(neptune=neptune, period='2y', timedelay=100, window=100, timestep=1, budget=5000, stockPicks=5, avoidDowntrends=True, sellAllOnCrash=False, **hyperparameters) stats = sim.run() analysis = Analysis(neptune=neptune, stats=stats, positions=sim.portfolio.holdings, prices=sim.downloader.prices) #analysis.chart() output, advanced_stats, obj_stats = analysis.positionStats() for k in list(obj_stats.keys()): neptune.log_metric(k, obj_stats[k]) print(output) #neptune.log_artifact('data/output_1y.pkl') sharpe = analysis.sharpe() stats = sim.portfolio.summary() if math.isnan(sharpe) or math.isinf(sharpe) or sharpe <= -2 or sharpe >= 5: sharpe = -5 #neptune.log_metric('sharpe', sharpe) #neptune.log_metric('start_value', 5000) #neptune.log_metric('end_value', stats['total_value']) report = { 'hyperparameters': hyperparameters, 'sharpe': sharpe, 'end_value': stats['total_value'], 'gains': (stats['total_value'] - 5000.0) / 5000.0 } neptune.log_text('report', json.dumps(report, indent=2, sort_keys=True)) return sharpe
def train_and_eval(self): assert FLAGS.mode == 'train_and_eval' current_step = estimator._load_global_step_from_checkpoint_dir( FLAGS.model_dir) train_epochs = self.params['train_steps'] / \ self.params['steps_per_epoch'] tf.logging.info( 'Training for %d steps (%.2f epochs in total). Current step %d.', self.params['train_steps'], train_epochs, current_step) # neptune.log_text('Train INFO', f"Training for {self.params['train_steps']} steps {train_epochs} epochs in total)\n Current step {current_step}") start_timestamp = time.time( ) # This time will include compilation time eval_results = None while current_step < self.params['train_steps']: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. steps_per_eval = int(FLAGS.epochs_per_eval * self.params['steps_per_epoch']) next_eval = (current_step // steps_per_eval) * \ steps_per_eval + steps_per_eval print("next eval point : ", next_eval) next_checkpoint = min(next_eval, self.params['train_steps']) self.est.train(input_fn=self.imagenet_train.input_fn, max_steps=int(next_checkpoint)) current_step = next_checkpoint tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) neptune.log_text( 'train INFO', 'Finished training up to step {}. Elapsed seconds {}'.format( next_checkpoint, int(time.time() - start_timestamp))) eval_results = self.eval() if eval_results is None: eval_results = self.eval() elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', self.params['train_steps'], elapsed_time) neptune.log_text( 'train INFO', 'Finished training up to step {} . Elapsed seconds {}'.format( self.params["train_steps"], elapsed_time)) tf.keras.backend.clear_session() tf.reset_default_graph() return eval_results['top_1_accuracy'].item()
def build_models(self, build_encoder, build_decoder): self.encoder, volume_size = build_encoder(z_size=self.z_dim, img_size=self.img_size) self.decoder = build_decoder(z_size=self.z_dim, volume_size=volume_size) # log model summary self.encoder.summary( print_fn=lambda x: neptune.log_text('encoder_summary', x)) self.decoder.summary( print_fn=lambda x: neptune.log_text('decoder_summary', x))
def log_dict(self, dict_name, input_dict, task_name="", recursion_level=0): if self.logger_active: # Add spaces so that dict prints prettily in logger spacing_str = '|' + ' - - - ' * recursion_level for key, value in input_dict.items(): if type(value) == dict: neptune.log_text(f"{task_name} {dict_name}", f"{spacing_str}{str(key)}") self.log_dict(dict_name, value, task_name, recursion_level + 1) else: neptune.log_text(f"{task_name} {dict_name}", f"{spacing_str}{str(key)}: {str(value)}") else: print(f"{task_name} {dict_name}: {str(input_dict)}")
def initialize(self, pauli_strings: Set[cirq.PauliString] = None): self.initialized = True expectations, pauli_strings = self.precomputed_expectations_provider.get_expectations_for_random_batch( pauli_strings, filter_small_expectations=self.filter_small_expectations) self.used_pauli_strings = pauli_strings self.input_dim = len(self.used_pauli_strings) if self.use_convolutions: conv_size = 108 real_size = len(pauli_strings) extra_dims = conv_size - real_size for exps in expectations: for _ in range(extra_dims): exps.append(0) self.input_dim = conv_size self.discriminator = self._discriminator() self.generator = self._generator() dataset = tf.data.Dataset.from_tensor_slices( (np.array(expectations, dtype='float32') + 1) / 2) # dataset = tf.data.Dataset.from_tensor_slices(np.array(expectations, dtype='float32')) dataset = dataset.shuffle(buffer_size=len(self.precomputed_expectations_provider.real_state_parameters)) \ .batch(self.batch_size) start = time.time() for epoch in range(self.epochs): epoch_gen_losses = [] epoch_disc_losses = [] for batch in dataset: gen_loss, disc_loss, pen_loss = self.train_step(batch) epoch_gen_losses.append(gen_loss) epoch_disc_losses.append(disc_loss) average_epoch_gen_loss = st.mean(epoch_gen_losses) average_epoch_disc_loss = st.mean(epoch_disc_losses) if self.use_neptune: neptune.log_metric("wgan_gen_loss", average_epoch_gen_loss) neptune.log_metric("wgan_disc_loss", average_epoch_disc_loss) neptune.log_metric("wgan_pen_loss", average_epoch_disc_loss) neptune.log_text("wgan_seed", str(self.seed)) if epoch % self.report_interval_epochs == 0: print( f"Epoch: {epoch}, time for last {self.report_interval_epochs} epochs {time.time() - start}" ) print( f"Last epoch gen loss: {average_epoch_gen_loss}, disc loss: {average_epoch_disc_loss}" ) start = time.time()
def save_roc(data_loader, epoch, net_g, net_d, len_train_dataset, test=False): _, _, pred_real, pred_fake, names = get_gan_metric(data_loader, net_g, net_d) true = np.concatenate((np.ones( (len(pred_real), 1)), np.zeros((len(pred_fake), 1))), axis=0) pred = np.concatenate((pred_real, pred_fake), axis=0) auc_total = roc_auc_score(true, pred) name_metric = 'val_' if test else '' neptune.log_metric(name_metric + 'auc_total', (epoch - 1) * len_train_dataset, auc_total) size = int(len(pred_real) / len(data_loader)) assert true.shape[0] == pred.shape[0] for i in range(len(data_loader)): sample = np.concatenate([ pred_real[i * size:(i + 1) * size], pred_fake[i * size:(i + 1) * size] ]) true = np.concatenate([np.ones((size, )), np.zeros((size, ))]) tmp = f'{epoch}\t{names[i]}:\n{roc_auc_score(true, sample)}' neptune.log_text(name_metric + 'auc_sample', tmp)
def train(self): data_loader = DataLoader( self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=0, collate_fn=synthetic_route_collate_fn ) for step in tqdm(range(self.num_steps)): try: graph, node2smi, node2molgraph, prop_traj = next(data_iter) except: data_iter = iter(data_loader) graph, node2smi, node2molgraph, prop_traj = next(data_iter) loss = self.train_batch(graph, node2molgraph, prop_traj) if not self.disable_neptune: neptune.log_metric("loss", loss.item()) if (step + 1) % self.eval_freq == 0: self.model.eval() with torch.no_grad(): graphs, node2smis, node2molgraphs = self.generator.generate(num_samples=self.num_eval_samples) self.model.train() if not self.disable_neptune: for graph, node2smi in zip(graphs, node2smis): edges = [ (u, v) for u, v in zip( graph.edges(etype=PARENT_EDGE_TYPE)[0].tolist(), graph.edges(etype=PARENT_EDGE_TYPE)[1].tolist(), ) ] neptune.log_text("edges", str(edges)) neptune.log_text("node2smi", str(node2smi)) if (step + 1) % self.checkpoint_freq == 0: state_dict = self.model.state_dict() with open(self.checkpoint_path, "wb") as f: torch.save(state_dict, f)
def main(argv): gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param, skip_unknown=True) print("Gin parameter bindings:\n{}".format(gin.config_str())) use_neptune = "NEPTUNE_API_TOKEN" in os.environ exp_id = '' if use_neptune: neptune.init(project_qualified_name='bbeatrix/curl') exp = neptune.create_experiment(params=gin_config_to_dict(gin.config_str()), name=FLAGS.gin_file[0].split('/')[-1][:-4], upload_source_files=['./*.py']) exp_id = exp.id else: neptune.init('shared/onboarding', 'ANONYMOUS', backend=neptune.OfflineBackend()) neptune.log_text('gin_config', gin.config_str()) neptune.log_artifact(*FLAGS.gin_file, 'gin_config_{}.gin'.format(exp_id)) exp_manager = ExperimentManager(prefix=exp_id) exp_manager.run_experiment() neptune.stop() print("Fin")
def evaluate_single(): path = r'datasets/classification' names = os.listdir(path) names = sorted(names) datasets = [{"name":x,"target_column":"class"} for x in names] for dataset in tqdm.tqdm(datasets[1:]): try: neptune.create_experiment(name = dataset['name']) print('Training ' + dataset['name']) data = pd.read_csv(path + '/' + dataset["name"]) change_df_column(data, dataset['target_column'], 'class') X, y = data.drop(columns=['class']), data['class'] X,y = preproces_data(X,y) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state=42, test_size=0.3) # print('cat') # get_cat_score(X_train, y_train, X_test, y_test) # print('lgbm') # get_lgbm_score(X_train,y_train,X_test,y_test) # print('xgb') # get_xgb_score(X_train, y_train, X_test, y_test) print('grid') get_grid_score(X_train, y_train, X_test, y_test) # print('bayes-cv') # get_bayes_scikit_score_cv(X_train, y_train, X_test, y_test, folds = 5, max_evals = 30) # print('bayes-10') # get_bayes_scikit_score(X_train, y_train, X_test, y_test, X_val, y_val, max_evals = 10) # print('bayes-15') # get_bayes_scikit_score(X_train, y_train, X_test, y_test, X_val, y_val, max_evals = 15) # print('bayes-25') # get_bayes_scikit_score(X_train, y_train, X_test, y_test, X_val, y_val, max_evals = 25) except Exception as ex: print(ex) neptune.log_text('failed', 'yes')
def get_model_args_and_gparams(model_json_path, override_params): """ Gets model_args from json file. Supports both tensorflow-style stages_args and more human-readable style. """ model_json = json.load(tf_open_file_in_path( "", model_json_path, "r"), object_pairs_hook=AttrDict) model_args = AttrDict(model_json) decoder = BlockArgsDecoder() model_args.stages_args = decoder.decode_to_stages_args( model_args.stages_args) gparams_dict = parse_gparams_from_model_args(model_args) global_params = GlobalParams(**gparams_dict) if override_params: global_params = global_params._replace(**override_params) tf.logging.info('global_params= %s', global_params) neptune.log_text("global_params", str(global_params)) tf.logging.info('stages_args= %s', model_args.stages_args) neptune.log_text("stages_args", str(model_args.stages_args)) return model_args, global_params
def start_training(self, env, render=False, load=False): if not os.path.exists("./dqn/results"): os.makedirs("./dqn/results") if not os.path.exists("./dqn/models"): os.makedirs("./dqn/models") file_name = "dqn_result" evaluations = [] neptune.log_text('cpu_count', str(psutil.cpu_count())) neptune.log_text('count_non_logical', str(psutil.cpu_count(logical=False))) l = [] tic_training = time.perf_counter() for i in range(self.training_episodes): neptune.log_text('avg_cpu_load', str(psutil.getloadavg())) neptune.log_text('cpu_percent', str(psutil.cpu_percent(interval=1, percpu=True))) tic_episode = time.perf_counter() if self.multi_processing: p = Process(target=self.episode, args=[env, i, render]) p.start() l.append(p) else: reward = self.episode(env, i, render) evaluations.append(reward) neptune.log_metric('reward', reward) toc_episode = time.perf_counter() neptune.log_metric('episode_duration', toc_episode - tic_episode) if self.multi_processing: [p.join() for p in l] toc_training = time.perf_counter() neptune.log_metric('training_duration', toc_training - tic_training) np.save(f"./dqn/results/{file_name}", evaluations)
def log_text(self, string, key=None, epoch=None): ''' Logs text strings Parameters ---------- string : STR text to log key: STR log_name needed for Neptune strings epoch: INT epoch or any other index Returns ------- None. ''' if self.neptune: if type(string) is str: if key is None: print('Neptune log_name needed for logging text') print('Using a dummy name: text') neptune.log_text('text', string) if epoch is None: neptune.log_text(key, string) else: neptune.log_text(key, epoch, y=string) else: print("Wrong type: logging text must be a string") if self.comet: if type(string) is str: if key is not None: print( "Commet text logging does not support keys, prepending it to text" ) string = key + ', ' + string if epoch is None: self.comet_experiment.log_text(string) else: self.comet_experiment.log_text(string, step=epoch) else: print("Wrong type: logging text must be a string")
def log_series(self): # floats neptune.log_metric("m1", 1) neptune.log_metric("m1", 2) neptune.log_metric("m1", 3) neptune.log_metric("m1", 2) neptune.log_metric("nested/m1", 1) # texts neptune.log_text("m2", "a") neptune.log_text("m2", "b") neptune.log_text("m2", "c") # images # `image_name` and `description` will be lost neptune.log_image("g_img", self.img_path, image_name="name", description="desc") neptune.log_image("g_img", self.img_path) # see what we've logged logs = neptune.get_experiment().get_logs() print(f"Logs: {logs}")
import neptune import numpy as np # Select project neptune.init('neptune-workshops/AII-Optimali') # Define parameters PARAMS = {'decay_factor': 0.5, 'n_iterations': 117} # Create experiment neptune.create_experiment(name='minimal-extended', params=PARAMS) # Log some metrics for i in range(1, PARAMS['n_iterations']): neptune.log_metric('iteration', i) neptune.log_metric('loss', PARAMS['decay_factor'] / i**0.5) neptune.log_text('text_info', 'some value {}'.format(0.95 * i**2)) # Add tag to the experiment neptune.append_tag('quick_start') # Log some images for j in range(5): array = np.random.rand(10, 10, 3) * 255 array = np.repeat(array, 30, 0) array = np.repeat(array, 30, 1) neptune.log_image('mosaics', array)
if PARAMS['optimizer'] == 'Adam': optimizer = tf.keras.optimizers.Adam( learning_rate=PARAMS['learning_rate'], ) elif PARAMS['optimizer'] == 'Nadam': optimizer = tf.keras.optimizers.Nadam( learning_rate=PARAMS['learning_rate'], ) elif PARAMS['optimizer'] == 'SGD': optimizer = tf.keras.optimizers.SGD( learning_rate=PARAMS['learning_rate'], ) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # log model summary model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) # train model model.fit(train_images, train_labels, batch_size=PARAMS['batch_size'], epochs=PARAMS['n_epochs'], shuffle=PARAMS['shuffle'], callbacks=[ keras.callbacks.LambdaCallback( on_epoch_end=lambda epoch, logs: log_data(logs)), keras.callbacks.EarlyStopping( patience=PARAMS['early_stopping'], monitor='accuracy', restore_best_weights=True), keras.callbacks.LearningRateScheduler(lr_scheduler)
def do_main(): device = torch.device(f'cuda:{gpu_number}') if torch.cuda.is_available( ) else torch.device('cpu') print(device) print(len(train_boxes_df)) print(len(train_images_df)) # Leave only > 0 print('Leave only train images with boxes (all)') with_boxes_filter = train_images_df[image_id_column].isin( train_boxes_df[image_id_column].unique()) images_val = train_images_df.loc[(train_images_df[fold_column] == fold) & with_boxes_filter, image_id_column].values images_train = train_images_df.loc[(train_images_df[fold_column] != fold) & with_boxes_filter, image_id_column].values print(len(images_train), len(images_val)) train_dataset = WheatDataset(images_train[:16], DIR_TRAIN, train_box_callback, transforms=get_train_transform(), is_test=False) valid_dataset = WheatDataset(images_val[:16], DIR_TRAIN, train_box_callback, transforms=get_valid_transform(), is_test=True) train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn) valid_data_loader = DataLoader(valid_dataset, batch_size=inf_batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn) config = get_efficientdet_config(f'tf_efficientdet_d{model_name[-1]}') net = EfficientDet(config, pretrained_backbone=False) #load_weights(net, '../timm-efficientdet-pytorch/efficientdet_d4-5b370b7a.pth') load_weights(net, '../timm-efficientdet-pytorch/efficientdet_d5-ef44aea8.pth') config.num_classes = 1 config.image_size = our_image_size net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) fold_weights_file = f'{experiment_name}.pth' if os.path.exists(fold_weights_file): # continue training print('Continue training, loading weights: ' + fold_weights_file) load_weights(net, fold_weights_file) model = DetBenchTrain(net, config) manager = ModelManager(model, device) weights_file = f'{experiment_name}.pth' manager.run_train(train_data_loader, valid_data_loader, n_epoches=n_epochs, weights_file=weights_file, factor=factor, start_lr=start_lr, min_lr=min_lr, lr_patience=lr_patience, overall_patience=overall_patience, loss_delta=loss_delta) # add tags neptune.log_text('save checkpoints as', weights_file[:-4]) neptune.stop()
def log_text(self, name, text): neptune.log_text(name, text)
def log_status(self, text): neptune.log_text('status', text)
def main() -> None: device = f"cuda:{gpu_number}" if torch.cuda.is_available() else torch.device('cpu') print(device) train_boxes_df = pd.read_csv(META_TRAIN) train_boxes_df = preprocess_boxes(train_boxes_df) train_images_df = pd.read_csv('folds/orig_alex_folds.csv') print(f'\nTotal images: {len(train_images_df.image_id.unique())}') # Leave only images with bboxes image_id_column = 'image_id' print('Leave only train images with boxes') with_boxes_filter = train_images_df[image_id_column].isin(train_boxes_df[image_id_column].unique()) # train/val images images_val = train_images_df.loc[ (train_images_df['fold'] == fold) & with_boxes_filter, image_id_column].values images_train = train_images_df.loc[ (train_images_df['fold'] != fold) & with_boxes_filter, image_id_column].values print(f'\nTrain images:{len(images_train)}, validation images {len(images_val)}') # get datasets train_dataset = WheatDataset( image_ids = images_train[:16], image_dir = TRAIN_DIR, #train_box_callback, labels_df = train_boxes_df, transforms = get_train_transforms(image_size), is_test = False ) valid_dataset = WheatDataset( image_ids = images_val[:16], image_dir = TRAIN_DIR, labels_df = train_boxes_df, #train_box_callback, transforms=get_valid_transforms(image_size), is_test=True ) # get dataloaders train_data_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn ) valid_data_loader = DataLoader( valid_dataset, batch_size=inf_batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn ) # efficientdet config config = get_efficientdet_config(f'tf_efficientdet_d{model_name[-1]}') config.num_classes = 1 config.image_size = image_size net = EfficientDet(config, pretrained_backbone=False) net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) weights_file = f'{experiment_name}.pth' # If resume training if os.path.exists(weights_file): print(f'Continue training, loading weights from: {weights_file}') load_weights(net, weights_file) else: print(f'Use coco pretrain') pretrain = get_effdet_pretrain_names(model_name) load_weights(net, '../../timm-efficientdet-pytorch/{pretrain}') model = DetBenchTrain(net, config) runner = ModelRunner(model, device) weights_file = f'{experiment_name}.pth' # add tags neptune.log_text('save checkpoints as', weights_file[:-4]) # run training runner.run_train(train_data_loader, valid_data_loader, n_epoches=n_epochs, weights_file=weights_file, factor=factor, start_lr=start_lr, min_lr=min_lr, lr_patience=lr_patience, overall_patience=overall_patience, loss_delta=loss_delta) neptune.stop()
name='b1_cnv2d_2', kernel_initializer='normal')(b1_cnv2d_2) b1_relu_2 = ReLU(name='b1_relu_2')(b1_cnv2d_2) # b1_out = BatchNormalization(epsilon=1e-3, momentum=0.999, name='b1_out')(b1_relu_2) # size: 64*64 reconstruction = Conv2D(filters=1, kernel_size=(1, 1), activation='sigmoid', padding='same')(b1_relu_2) # </editor-fold> decoder = Model(latent_inputs, reconstruction, name='origin_decoder') decoder.summary() # log model summary encoder.summary(print_fn=lambda x: neptune.log_text('encoder_summary', x)) decoder.summary(print_fn=lambda x: neptune.log_text('decoder_summary', x)) outputs = decoder(encoder(input_img)) vae = Model(input_img, outputs, name='vae') def get_vae_loss(input, x_decoded_mean, encoded_sigma, encoded_mean): xent_loss = tf.reduce_mean(mean_squared_error(input, x_decoded_mean)) kl_loss = 0.5 * tf.reduce_sum( tf.square(encoded_mean) + tf.square(encoded_sigma) - tf.math.log(tf.square(encoded_sigma)) - 1, -1) return xent_loss + kl_loss reconst_loss = mean_squared_error(K.flatten(input_img), K.flatten(outputs)) reconst_loss *= image_size * image_size kl_loss = 0.5 * tf.reduce_sum(
def log_text(self, name: str, value: str): if not self.disabled: neptune.log_text(name, value)
apprentice_storage=apprentice_storage, expert_storage=expert_storage, apprentice_handler=apprentice_handler, expert_handler=expert_handler, char_dict=char_dict, num_keep=args.num_keep, apprentice_sampling_batch_size=args.apprentice_sampling_batch_size, expert_sampling_batch_size=args.expert_sampling_batch_size, apprentice_training_batch_size=args.apprentice_training_batch_size, num_apprentice_training_steps=args.num_apprentice_training_steps, init_smis=[reference_smi], ) recorder = Recorder(scoring_num_list=scoring_num_list, record_filtered=args.record_filtered) exp_generator = GeneticExpertGuidedLearningGenerator( trainer=trainer, recorder=recorder, num_steps=args.num_steps, device=device, scoring_num_list=scoring_num_list, num_jobs=args.num_jobs, ) result = benchmark.assess_model(exp_generator) neptune.log_metric("id", smi_id) neptune.log_metric("score", result.optimized_molecules[0][1]) neptune.log_text("reference_smi", reference_smi) neptune.log_text("optimized_smi", result.optimized_molecules[0][0])
def train_pnas(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(34) train_dataset, validation_dataset, data_files = create_dataset( dataset_name=PARAMS['dataset_name'], batch_size=PARAMS['BATCH_SIZE'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augment_train=PARAMS['augment_train'], aug_prob=PARAMS['aug_prob']) PARAMS['num_classes'] = data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size'][ 'train'] = data_files.num_samples * PARAMS['splits']['train'] PARAMS['splits_size'][ 'validation'] = data_files.num_samples * PARAMS['splits']['validation'] steps_per_epoch = PARAMS['splits_size']['train'] // PARAMS['BATCH_SIZE'] validation_steps = PARAMS['splits_size']['validation'] // PARAMS[ 'BATCH_SIZE'] neptune.set_property('num_classes', PARAMS['num_classes']) neptune.set_property('steps_per_epoch', steps_per_epoch) neptune.set_property('validation_steps', validation_steps) encoder = base_dataset.LabelEncoder(data_files.classes) # train_dataset = train_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # validation_dataset = validation_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # METRICS = ['accuracy'] callbacks = [ neptune_logger, ImageLoggerCallback(data=train_dataset, freq=10, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=10, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=25, verbose=1) ] PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels']) model = build_model(PARAMS) # if PARAMS['optimizer']=='Adam': # optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr']) # base = tf.keras.applications.vgg16.VGG16(weights='imagenet', # include_top=False, # input_tensor=Input(shape=(*PARAMS['target_size'],3))) # model = build_head(base, num_classes=PARAMS['num_classes']) # model.compile(optimizer=optimizer, # loss=PARAMS['loss'], # metrics=METRICS) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) for k, v in PARAMS.items(): neptune.set_property(str(k), str(v)) return history
def train_imagenette(PARAMS): neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) K.clear_session() tf.random.set_seed(34) target_size = PARAMS['target_size'] BATCH_SIZE = PARAMS['BATCH_SIZE'] train_dataset, validation_dataset, info = create_Imagenette_dataset( BATCH_SIZE, target_size=target_size, augment_train=PARAMS['augment_train']) num_classes = info.features['label'].num_classes encoder = base_dataset.LabelEncoder(info.features['label'].names) train_dataset = train_dataset.map( lambda x, y: apply_preprocess(x, y, num_classes), num_parallel_calls=-1) validation_dataset = validation_dataset.map( lambda x, y: apply_preprocess(x, y, num_classes), num_parallel_calls=-1) PARAMS['num_classes'] = num_classes steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE validation_steps = info.splits['validation'].num_examples // BATCH_SIZE neptune.set_property('num_classes', num_classes) neptune.set_property('steps_per_epoch', steps_per_epoch) neptune.set_property('validation_steps', validation_steps) optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['learning_rate']) loss = 'categorical_crossentropy' METRICS = ['accuracy'] base = tf.keras.applications.vgg16.VGG16( weights='imagenet', include_top=False, input_tensor=Input(shape=(*target_size, 3))) # TODO try freezing weights for input_shape != (224,224) model = build_head(base, num_classes=num_classes) model.compile(optimizer=optimizer, loss=loss, metrics=METRICS) callbacks = [ neptune_logger, ImageLoggerCallback(data=train_dataset, freq=10, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=10, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=2, verbose=1) ] model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=10, callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps)
def on_epoch_end(self, epoch, optimizer, val_generator, weights_file, factor, min_lr, lr_patience, overall_patience, loss_delta): self.train_model.eval() current_loss = 0 tqdm_generator = tqdm(val_generator, mininterval=15) tqdm_generator.set_description('validation loss') with torch.no_grad(): for batch_idx, (batch_imgs, batch_labels, image_id) in enumerate(tqdm_generator): batch_imgs = torch.stack(batch_imgs) batch_imgs = batch_imgs.to(self.device).float() batch_boxes = [ target['boxes'].to(self.device) for target in batch_labels ] batch_labels = [ target['labels'].to(self.device) for target in batch_labels ] loss, _, _ = self.train_model(batch_imgs, batch_boxes, batch_labels) loss_value = loss.item() # just slide average current_loss = (current_loss * batch_idx + loss_value) / (batch_idx + 1) # validate loss print('\nValidation loss: ', current_loss) neptune.log_metric('Validation loss', current_loss) # validate metric nms_thr = 0.4 true_list, pred_boxes, pred_scores = self.predict(val_generator) current_metric = competition_metric(true_list, pred_boxes, pred_scores, nms_thr) print('\nValidation mAP', current_metric) neptune.log_metric('Validation mAP', current_metric) neptune.log_text('nms_threshold', str(nms_thr)) if current_loss < self.best_loss - loss_delta: print( f'\nLoss has been improved from {self.best_loss} to {current_loss}' ) self.best_loss = current_loss self.best_epoch = epoch torch.save(self.train_model.model.state_dict(), f'{weights_file}') else: print(f'\nLoss has not been improved from {self.best_loss}') if current_metric > self.best_metric: print( f'\nmAP has been improved from {self.best_metric} to {current_metric}' ) self.best_metric = current_metric self.best_epoch = epoch torch.save(self.train_model.model.state_dict(), f'{weights_file}_best_map') if epoch - self.best_epoch > overall_patience: print('\nEarly stop: training finished with patience!') return False print('curr_lr_loss', self.curr_lr_loss) if current_loss >= self.curr_lr_loss - loss_delta: print('curr_lr_loss not improved') old_lr = float(get_lr(optimizer)) print('old_lr', old_lr) if old_lr > min_lr and epoch - self.best_lr_epoch > lr_patience: new_lr = old_lr * factor new_lr = max(new_lr, min_lr) print('new_lr', new_lr) set_lr(optimizer, new_lr) self.curr_lr_loss = 100 self.best_lr_epoch = epoch print( '\nEpoch %05d: ReduceLROnPlateau reducing learning rate to %s.' % (epoch, new_lr)) else: print('curr_lr_loss improved') self.curr_lr_loss = current_loss self.best_lr_epoch = epoch return True
def do_main(): neptune.init('ods/wheat') # Create experiment with defined parameters neptune.create_experiment(name=model_name, params=PARAMS, tags=[experiment_name, experiment_tag], upload_source_files=[os.path.basename(__file__)]) neptune.append_tags(f'fold_{fold}') device = torch.device(f'cuda:{gpu_number}') if torch.cuda.is_available( ) else torch.device('cpu') print(device) print(len(train_boxes_df)) print(len(train_images_df)) # Leave only > 0 print('Leave only train images with boxes (validation)') with_boxes_filter = train_images_df[image_id_column].isin( train_boxes_df[image_id_column].unique()) negative_images = enumerate_images(DIR_NEGATIVE) negative_images = [(negative_prefix + filename[:-4]) for filename in negative_images] negative_images.sort() # take first 100 now... negative_images = negative_images[:100] """ spike_images = enumerate_images(DIR_SPIKE) spike_images = [(spike_dataset_prefix + filename[:-4]) for filename in spike_images] spike_images.sort() assert len(spike_images) > 0 """ config = get_efficientdet_config('tf_efficientdet_d5') net = EfficientDet(config, pretrained_backbone=False) load_weights(net, '../timm-efficientdet-pytorch/efficientdet_d5-ef44aea8.pth') config.num_classes = 1 config.image_size = our_image_size net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) model_train = DetBenchTrain(net, config) model_eval = DetBenchEval(net, config) manager = ModelManager(model_train, model_eval, device) pretrained_weights_file = 'pretrained.pth' images_val = train_images_df.loc[(train_images_df[fold_column] == fold) & with_boxes_filter, image_id_column].values images_train = train_images_df.loc[(train_images_df[fold_column] != fold), image_id_column].values #images_train = list(images_train) + list(negative_images) + list(spike_images) images_train = list(images_train) + list(negative_images) print(len(images_train), len(images_val)) train_dataset = WheatDataset(images_train, DIR_TRAIN, train_boxes_df, transforms=get_train_transform(), is_test=False) valid_dataset = WheatDataset(images_val, DIR_TRAIN, train_boxes_df, transforms=get_valid_transform(), is_test=True) train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn, drop_last=True) valid_data_loader = DataLoader(valid_dataset, batch_size=inf_batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn) weights_file = f'{experiment_name}.pth' if os.path.exists(pretrained_weights_file): # continue training print('Continue training, loading weights: ' + pretrained_weights_file) load_weights(net, pretrained_weights_file) manager.run_train(train_data_loader, valid_data_loader, n_epoches=n_epochs, weights_file=weights_file, factor=factor, start_lr=start_lr, min_lr=min_lr, lr_patience=lr_patience, overall_patience=overall_patience, loss_delta=loss_delta) # add tags neptune.log_text('save checkpoints as', weights_file[:-4]) neptune.stop()
def NeptuneLog(): neptune.log_metric('batch_size', batch_sizes) neptune.log_metric('learning_rate', learning_rate) neptune.log_text('pre-trained', str(pretrain_check)) neptune.log_text('model', model_name)