Ejemplo n.º 1
0
def main():
    STARTTIME0 = time.strftime('run_%Y_%m_%d_%H_%M_%s')
    METRICS = []
    for ts_size in [3000, 5000, 5600]:
        for iteration in range(10):
            _, _, X_train, X_test, y_train, y_test, _ = process_data(
                size=ts_size)

            experiment = Experiment(api_key=os.environ['COMET_API_KEY'],
                                    project_name='color-ml')

            experiment.log_parameters(PARAMETERS_MEDIAN)

            with experiment.train():
                regressor_median = fit(X_train, y_train)

            metrics_dict = get_metrics_dict(regressor_median, X_test, y_test,
                                            experiment)
            metrics_dict['iteration'] = iteration
            metrics_dict['ts_size'] = ts_size

            METRICS.append(metrics_dict)

    df = pd.DataFrame(METRICS)
    df.to_csv('learningurve_' + STARTTIME0 + '.csv')
    experiment.log_asset('learningurve_' + STARTTIME0 + '.csv')
Ejemplo n.º 2
0
def main(modelpath, xpath, ypath, outname):
    experiment = Experiment(api_key=os.environ['COMET_API_KEY'],
                            project_name='color-ml')
    model = joblib.load(modelpath)
    X = np.load(xpath)
    y = np.load(ypath)

    metrics = get_metrics(model, X, y, experiment)

    df = pd.DataFrame(metrics)
    df.to_csv(outname, index=False)
    experiment.log_asset(outname)
Ejemplo n.º 3
0
def setup_comet_ml(args, rank):
    # dummy init of experiment so it can be used without error
    # even if comet is disabled
    experiment = Experiment(api_key='dummy_key', disabled=True)
    if args.comet_api_key:
        # initiating comet
        if args.existing_exp_key:
            if rank == 0:
                print("STARTING FROM AND EXISTING EXPERIMENT")
            experiment = ExistingExperiment(
                api_key=args.comet_api_key, workspace=args.comet_workspace,
                project_name=args.project_name, previous_experiment=args.existing_exp_key,
                auto_output_logging="simple", auto_metric_logging=False, parse_args=False,
                disabled=args.disable_comet or rank != 0)
        else:
            if rank == 0:
                print("STARTING A NEW EXPERIMENT")
            experiment = Experiment(
                api_key=args.comet_api_key, workspace=args.comet_workspace,
                project_name=args.project_name, auto_output_logging="simple", auto_metric_logging=False,
                parse_args=False, disabled=args.disable_comet or rank != 0)

    experiment.log_asset('config.yaml')
    experiment.log_asset('config_prod.yaml')
    experiment.log_asset('config_prod_prime.yaml')

    return experiment
Ejemplo n.º 4
0
 def log(self, experiment=None):
     ''' Export all logs in the Comet.ml environment.
         See https://www.comet.ml/ for more details
     '''
     
     # Initialize Comet.ml experience (naming, tags) for automatic logging
     project_name = 'Optimization' if self.comet_optimize else 'Summary'
     experiment_name = '{} - {} '.format(self.model_name, str(self.batch_size)) + ('ES+' if self.train_after_es else '')
     experiment_tags = [ self.model_name, self.monitor_val ] + (['ES+'] if self.train_after_es else []) +  (['Pre-train'] if self.pretraining else [])
     
     if experiment == None:
         experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name=project_name, log_code=False, auto_param_logging=False, auto_metric_logging=False)
     experiment.set_name(experiment_name)
     experiment.add_tags(experiment_tags)
     
     # Export hyperparameters
     experiment.log_parameters(self.dataloader_params)
     experiment.log_parameters(self.training_params)   
     
     # Export metrics values
     experiment.log_metrics({'Average accuracy' : np.mean(self.test_score['accuracy']), 'Std accuracy' : np.std(self.test_score['accuracy'])})
     
     # Export metrics graphs for each pilot (accuracy, loss, confusion matrix)
     [ experiment.log_figure(figure_name='Confusion matrix {}'.format(pilot_idx), figure=plot_cm(self.conf_matrices, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     [ experiment.log_figure(figure_name='Loss pilot {}'.format(pilot_idx), figure=plot_loss(self.histories[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     fig, ax = plt.subplots(figsize=(10,6))
     plot_full_barchart(self.test_score, n_pilots=self.n_pilots, title=' {} ConvNet model'.format(self.model_name), fig=fig)
     experiment.log_figure(figure_name='Accuracy barchart', figure=fig)
     
     if self.train_after_es:
         [ experiment.log_figure(figure_name='Loss pilot {} (ES+)'.format(pilot_idx), figure=plot_loss(self.histories_es[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     # Export model weights for each pilot
     [ experiment.log_asset('{}{}.h5'.format(self.weights_savename_prefix, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     experiment.end()
Ejemplo n.º 5
0
     print("Train Ensemble")
     model.ensemble(experiment=experiment)
 
 #Final score, be absolutely sure you get all the data, feed slowly in batches of 1
 final_score = model.ensemble_model.evaluate(model.val_split.unbatch().batch(1))    
 experiment.log_metric("Ensemble Accuracy", final_score[1])
 
 #Save model and figure
 #tf.keras.utils.plot_model(model.ensemble_model, to_file="{}/Ensemble.png".format(save_dir))
 #experiment.log_figure("{}/Ensemble.png".format(save_dir))
 model.ensemble_model.save("{}/Ensemble.h5".format(save_dir))
 
 #save predictions
 predicted_shp = model.predict(model = model.ensemble_model)
 predicted_shp.to_file("{}/prediction.shp".format(save_dir))
 experiment.log_asset("{}/prediction.shp".format(save_dir))
 experiment.log_asset("{}/prediction.dbf".format(save_dir))
 experiment.log_asset("{}/prediction.shx".format(save_dir))
 experiment.log_asset("{}/prediction.cpg".format(save_dir))
 
 #per species accurracy
 predicted_shp["match"] = predicted_shp.apply(lambda x: x.true_taxonID == x.predicted_taxonID, 1)
 per_species = predicted_shp.groupby("true_taxonID").apply(lambda x: x["match"].sum()/len(x))
 per_species.to_csv("{}/perspecies.csv".format(save_dir))
 experiment.log_asset("{}/perspecies.csv".format(save_dir))
 
 per_site = predicted_shp.groupby("siteID").apply(lambda x: x["match"].sum()/len(x))
 per_site.to_csv("{}/persite.csv".format(save_dir))
 experiment.log_asset("{}/persite.csv".format(save_dir))   
 
 #Plots - this function needs to be rewritten because the dataset is now nested: ids, (data, label). probably predict on batch.
Ejemplo n.º 6
0
    # experiment.log_metric("train_loss", np.mean(train_loss), step=epoch)
    experiment.log_metric("train_perplexity", train_ppl, step=epoch)

    # RUN MODEL ON VALIDATION DATA
    val_ppl, val_loss = run_epoch(model, valid_data)
    # experiment.log_metric("val_loss", np.mean(val_loss), step=epoch)
    experiment.log_metric("val_perplexity", val_ppl, step=epoch)

    # SAVE MODEL IF IT'S THE BEST SO FAR
    if val_ppl < best_val_so_far:
        best_val_so_far = val_ppl
        if args.save_best:
            print("Saving model parameters to best_params.pt")
            best_model_path = os.path.join(args.save_dir, 'best_params.pt')
            torch.save(model.state_dict(), best_model_path)
            experiment.log_asset(best_model_path, overwrite=True)
        # NOTE ==============================================
        # You will need to load these parameters into the same model
        # for a couple Problems: so that you can compute the gradient
        # of the loss w.r.t. hidden state as required in Problem 5.2
        # and to sample from the the model as required in Problem 5.3
        # We are not asking you to run on the test data, but if you
        # want to look at test performance you would load the saved
        # model and run on the test data with batch_size=1

    # LOC RESULTS
    train_ppls.append(train_ppl)
    val_ppls.append(val_ppl)
    train_losses.extend(train_loss)
    val_losses.extend(val_loss)
    times.append(time.time() - t0)
Ejemplo n.º 7
0
def train(opt):
    # Set random seed
    if torch.cuda.is_available():
        torch.cuda.manual_seed(opt.random_seed)
    else:
        torch.manual_seed(opt.random_seed)
    # Instantiate the model
    if opt.conv_dim is not None and \
       opt.conv_kernel_sizes is not None and \
       opt.conv_strides is not None and \
       opt.fc_dim is not None:
        model = DeepQNetwork(opt.image_size,
                             opt.image_size,
                             conv_dim=opt.conv_dim,
                             conv_kernel_sizes=opt.conv_kernel_sizes,
                             conv_strides=opt.conv_strides,
                             fc_dim=opt.fc_dim)
    else:
        model = DeepQNetwork(opt.image_size, opt.image_size)

    if opt.log_comet_ml:
        # Create a Comet.ml experiment
        experiment = Experiment(api_key=opt.comet_ml_api_key,
                                project_name=opt.comet_ml_project_name,
                                workspace=opt.comet_ml_workspace)
        experiment.log_other("iters_to_save", opt.iters_to_save)
        experiment.log_other("completed", False)
        experiment.log_other("random_seed", opt.random_seed)

        # Report hyperparameters to Comet.ml
        hyper_params = {
            "image_size": opt.image_size,
            "batch_size": opt.batch_size,
            "optimizer": opt.optimizer,
            "learning_rate": opt.lr,
            "gamma": opt.gamma,
            "initial_epsilon": opt.initial_epsilon,
            "final_epsilon": opt.final_epsilon,
            "num_iters": opt.num_iters,
            "replay_memory_size": opt.replay_memory_size,
            "random_seed": opt.random_seed,
            "conv_dim": opt.conv_dim,
            "conv_kernel_sizes": opt.conv_kernel_sizes,
            "conv_strides": opt.conv_strides,
            "fc_dim": opt.fc_dim
        }
        experiment.log_parameters(hyper_params)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-6)  # Optimization algorithm
    criterion = nn.MSELoss()  # Loss function
    game_state = FlappyBird()  # Instantiate the Flappy Compass game
    image, reward, terminal = game_state.next_frame(
        0
    )  # Get the next image, along with its reward and an indication if it's a terminal state

    # Image preprocessing step (scaling, color removal and convertion to a PyTorch tensor)
    image = pre_processing(
        image[:game_state.screen_width, :int(game_state.base_y)],
        opt.image_size, opt.image_size)
    image = torch.from_numpy(image)

    # Move the model and the current image data to the GPU, if available
    if torch.cuda.is_available():
        model.cuda()
        image = image.cuda()

    # Prepare the state variable, which will host the last 4 frames
    state = torch.cat(tuple(image for _ in range(4)))[None, :, :, :]

    # Initialize the replay memory, which saves sets of consecutive game states, the reward and terminal state indicator
    # so that the model can learn from them (essentially constitutes the training data, which grows with every new iteration)
    replay_memory = []

    iter = 0  # Iteration counter

    # Main training loop performing the number of iterations specified by num_iters
    while iter < opt.num_iters:
        prediction = model(state)[0]  # Get a prediction from the current state
        epsilon = opt.final_epsilon + (
            (opt.num_iters - iter) *
            (opt.initial_epsilon - opt.final_epsilon) / opt.num_iters
        )  # Set the decay of the probability of random actions
        u = random()
        random_action = u <= epsilon
        if random_action:
            print("Perform a random action")
            action = randint(0, 1)
        else:
            # Use the model's prediction to decide the next action
            action = torch.argmax(prediction).item()

        # Get a new frame and process it
        next_image, reward, terminal = game_state.next_frame(action)
        next_image = pre_processing(
            next_image[:game_state.screen_width, :int(game_state.base_y)],
            opt.image_size, opt.image_size)
        next_image = torch.from_numpy(next_image)

        # Move the next image data to the GPU, if available
        if torch.cuda.is_available():
            next_image = next_image.cuda()

        next_state = torch.cat(
            (state[0, 1:, :, :], next_image)
        )[None, :, :, :]  # Prepare the next state variable, which will host the last 4 frames
        replay_memory.append(
            [state, action, reward, next_state, terminal]
        )  # Save the current state, action, next state and terminal state indicator in the replay memory
        if len(replay_memory) > opt.replay_memory_size:
            del replay_memory[
                0]  # Delete the oldest reolay from memory if full capacity has been reached
        batch = sample(replay_memory, min(len(replay_memory), opt.batch_size)
                       )  # Retrieve past play sequences from the replay memory
        state_batch, action_batch, reward_batch, next_state_batch, terminal_batch = zip(
            *batch)

        state_batch = torch.cat(tuple(
            state for state in state_batch))  # States of the current batch
        action_batch = torch.from_numpy(
            np.array([[1, 0] if action == 0 else [0, 1]
                      for action in action_batch],
                     dtype=np.float32))  # Actions taken in the current batch
        reward_batch = torch.from_numpy(
            np.array(reward_batch,
                     dtype=np.float32)[:,
                                       None])  # Rewards in the current batch
        next_state_batch = torch.cat(tuple(
            state
            for state in next_state_batch))  # Next states of the current batch

        # Move batch data to the GPU, if available
        if torch.cuda.is_available():
            state_batch = state_batch.cuda()
            action_batch = action_batch.cuda()
            reward_batch = reward_batch.cuda()
            next_state_batch = next_state_batch.cuda()

        current_prediction_batch = model(
            state_batch
        )  # Predictions of the model for the replays of the current batch
        next_prediction_batch = model(
            next_state_batch
        )  # Next predictions of the model for the replays of the current batch

        # Set ground truth for the rewards for the current batch, considering whether the state is terminal or not
        y_batch = torch.cat(
            tuple(reward if terminal else reward +
                  opt.gamma * torch.max(prediction)
                  for reward, terminal, prediction in zip(
                      reward_batch, terminal_batch, next_prediction_batch)))

        q_value = torch.sum(
            current_prediction_batch * action_batch, dim=1
        )  # Predicted Q values (i.e. estimated return for each action)
        optimizer.zero_grad(
        )  # Reset the gradients to zero before a new optimization step
        loss = criterion(q_value, y_batch)  # Calculate the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Weights optimization step

        state = next_state  # Move to the next frame
        iter += 1
        print(
            "Iteration: {}/{}, Action: {}, Loss: {}, Epsilon {}, Reward: {}, Q-value: {}"
            .format(iter + 1, opt.num_iters, action, loss, epsilon, reward,
                    torch.max(prediction)))

        if opt.log_comet_ml:
            # Log metrics to Comet.ml
            experiment.log_metric("train_loss", loss, step=iter)
            experiment.log_metric("train_epsilon", epsilon, step=iter)
            experiment.log_metric("train_reward", reward, step=iter)
            experiment.log_metric("train_Q_value",
                                  torch.max(prediction),
                                  step=iter)

        if (iter + 1) % opt.iters_to_save == 0:
            # Get the current day and time to attach to the saved model's name
            current_datetime = datetime.now().strftime('%d_%m_%Y_%H_%M')

            # Set saved model name
            model_filename = f'{opt.saved_path}/flappy_compass_{current_datetime}_{iter+1}.pth'

            # Save model every iters_to_save iterations
            torch.save(model, model_filename)

            if opt.log_comet_ml and opt.comet_ml_save_model:
                # Upload model to Comet.ml
                experiment.log_asset(file_path=model_filename, overwrite=True)

    # Get the current day and time to attach to the saved model's name
    current_datetime = datetime.now().strftime('%d_%m_%Y_%H_%M')

    # Set saved model name
    model_filename = f'{opt.saved_path}/flappy_compass_{current_datetime}_{iter+1}.pth'

    # Save the model after reaching the final iteration
    torch.save(model, model_filename)

    if opt.log_comet_ml:
        # Only report that the experiment completed successfully if it finished the training without errors
        experiment.log_other("completed", True)

        if opt.comet_ml_save_model:
            # Upload model to Comet.ml
            experiment.log_asset(file_path=model_filename, overwrite=True)
            "================================================================="
        )

        if current_epoch % MODEL_SAVE_INTERVAL == 0:
            current_save_model_name = save_checkpoint(
                {
                    'model': net.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'e': current_epoch,
                    'PACNN_PERSPECTIVE_AWARE_MODEL':
                    PACNN_PERSPECTIVE_AWARE_MODEL
                    # 'amp': amp.state_dict()
                },
                False,
                MODEL_SAVE_NAME + "_" + str(current_epoch) + "_")
            experiment.log_asset(current_save_model_name)
            print("saved ", current_save_model_name)

        # end 1 epoch

        # after epoch evaluate
        mae_calculator_d1 = MAECalculator()
        mae_calculator_d2 = MAECalculator()
        mae_calculator_d3 = MAECalculator()
        mae_calculator_final = MAECalculator()
        with torch.no_grad():
            for val_img, label in val_loader_pacnn:
                net.eval()
                # load data
                d1_label, d2_label, d3_label = label
Ejemplo n.º 9
0
    help="Path to the config file.",
)
parser.add_argument("--output_path",
                    type=str,
                    default=".",
                    help="outputs path")
parser.add_argument("--resume", action="store_true")
parser.add_argument("--trainer", type=str, default="MUNIT", help="MUNIT|UNIT")
parser.add_argument("--git_hash",
                    type=str,
                    default="no-git-hash",
                    help="output of git log --pretty=format:'%h' -n 1")
opts = parser.parse_args()

if comet_exp is not None:
    comet_exp.log_asset(file_data=opts.config, file_name="config.yaml")
    comet_exp.log_parameter("git_hash", opts.git_hash)

cudnn.benchmark = True
# Load experiment setting
config = get_config(opts.config)
max_iter = config["max_iter"]
display_size = config["display_size"]
config["vgg_model_path"] = opts.output_path

# Setup model and data loader
if opts.trainer == "MUNIT":
    trainer = MUNIT_Trainer(config)
elif opts.trainer == "UNIT":
    trainer = UNIT_Trainer(config)
else:
Ejemplo n.º 10
0
def main(cfg: DictConfig):
    print('Nishika Second-hand Apartment Price Training')
    cur_dir = hydra.utils.get_original_cwd()
    os.chdir(cur_dir)
    data_dir = './input'

    seed_everything(cfg.data.seed)

    experiment = Experiment(api_key=cfg.exp.api_key,
                            project_name=cfg.exp.project_name,
                            auto_output_logging='simple',
                            auto_metric_logging=False)

    experiment.log_parameters(dict(cfg.data))

    # Config  ####################################################################################
    del_tar_col = ['取引時点']
    id_col = 'ID'
    tar_col = '取引価格(総額)_log'
    g_col = 'year'
    criterion = MAE
    cv = KFold(n_splits=cfg.data.n_splits,
               shuffle=True,
               random_state=cfg.data.seed)
    # cv = GroupKFold(n_splits=5)

    # Load Data  ####################################################################################
    if cfg.exp.use_pickle:
        # pickleから読み込み
        df = unpickle('./input/data.pkl')

    else:
        df = load_data(data_dir,
                       sampling=cfg.data.sampling,
                       seed=cfg.data.seed,
                       id_col=id_col,
                       target_col=tar_col)
        # Preprocessing
        print('Preprocessing')
        df = preprocessing(df, cfg)

        # pickle形式で保存
        to_pickle('./input/data.pkl', df)
        try:
            experiment.log_asset(file_data='./input/data.pkl',
                                 file_name='data.pkl')
        except:
            pass

    features = [c for c in df.columns if c not in del_tar_col]

    # Model  ####################################################################################
    model = None
    if cfg.exp.model == 'lgb':
        model = LGBMModel(dict(cfg.lgb))
    elif cfg.exp.model == 'cat':
        model = CatBoostModel(dict(cfg.cat))

    # Train & Predict  ##############################################################################
    trainer = Trainer(model, id_col, tar_col, g_col, features, cv, criterion,
                      experiment)
    trainer.fit(df)
    trainer.predict(df)
    trainer.get_feature_importance()
def main(cfg: DictConfig):
    cur_dir = hydra.utils.get_original_cwd()
    os.chdir(cur_dir)

    seed_everything(cfg.train.seed)
    # Comet.ml
    experiment = Experiment(api_key=API_KEY, project_name=PROJECT_NAME)

    # Load Data  ################################################################
    # Chris Dataset
    chris_image_size = cfg.data.load_size
    data_dir = f'./input/_Chris_Dataset_{chris_image_size}'
    train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    test = pd.read_csv(os.path.join(data_dir, 'test.csv'))

    img_paths = {
        'train': glob.glob(os.path.join(data_dir, 'train', '*.jpg')),
        'test': glob.glob(os.path.join(data_dir, 'test', '*.jpg'))
    }

    # Cross Validation  #########################################################
    # GroupKFold
    cv = GroupKFold(n_splits=5)
    train['fold'] = -1
    for i, (trn_idx, val_idx) in enumerate(
            cv.split(train,
                     train['target'],
                     groups=train['patient_id'].tolist())):
        train.loc[val_idx, 'fold'] = i

    # Preprocessing  ############################################################
    # Drop Image
    drop_image_name = [
        'ISIC_4579531', 'ISIC_7918608', 'ISIC_0948240', 'ISIC_4904364',
        'ISIC_8780369', 'ISIC_8770180', 'ISIC_7148656', 'ISIC_7408392',
        'ISIC_9959813', 'ISIC_1894141', 'ISIC_6633174', 'ISIC_3001941',
        'ISIC_4259290', 'ISIC_6833905', 'ISIC_7452152', 'ISIC_2744859',
        'ISIC_5464206', 'ISIC_6596403', 'ISIC_0711790', 'ISIC_5644568',
        'ISIC_5843094', 'ISIC_8904326', 'ISIC_4963405', 'ISIC_9839042',
        'ISIC_1355907', 'ISIC_0694037', 'ISIC_9513918', 'ISIC_0787851',
        'ISIC_2932886', 'ISIC_2336763', 'ISIC_4064330', 'ISIC_7358293',
        'ISIC_5789052', 'ISIC_7828320', 'ISIC_8277969', 'ISIC_1080647',
        'ISIC_3238159', 'ISIC_8480913', 'ISIC_3790692', 'ISIC_0612624',
        'ISIC_1242543', 'ISIC_4036915', 'ISIC_8174647', 'ISIC_2956783',
        'ISIC_3302289', 'ISIC_6761105', 'ISIC_2152755', 'ISIC_9169000',
        'ISIC_6852275', 'ISIC_4432898', 'ISIC_5459207', 'ISIC_7418664',
        'ISIC_5136612', 'ISIC_9174738', 'ISIC_3160301', 'ISIC_7140636',
        'ISIC_7718384', 'ISIC_9336675', 'ISIC_4282719', 'ISIC_4330005',
        'ISIC_9828463', 'ISIC_6511141', 'ISIC_5335139', 'ISIC_5104921',
        'ISIC_0695575', 'ISIC_0610141', 'ISIC_5946998', 'ISIC_0464315',
        'ISIC_6556513', 'ISIC_3688407', 'ISIC_7730443', 'ISIC_4358550',
        'ISIC_6461484', 'ISIC_9690422', 'ISIC_5374076', 'ISIC_1793200',
        'ISIC_1389620', 'ISIC_8098274', 'ISIC_6425888', 'ISIC_6321076',
        'ISIC_4298309', 'ISIC_2981912', 'ISIC_3650938', 'ISIC_4288522',
        'ISIC_9459785', 'ISIC_1938535', 'ISIC_5576241', 'ISIC_6567889',
        'ISIC_2768800', 'ISIC_6023795', 'ISIC_9281339', 'ISIC_6712494',
        'ISIC_1811256', 'ISIC_5157055', 'ISIC_3943097', 'ISIC_7194471',
        'ISIC_0361529', 'ISIC_9797578', 'ISIC_3575926', 'ISIC_6166824',
        'ISIC_8828670', 'ISIC_6953126', 'ISIC_4430815', 'ISIC_8146054',
        'ISIC_9305209', 'ISIC_4263017', 'ISIC_9314144', 'ISIC_1330763',
        'ISIC_4792936', 'ISIC_1823608', 'ISIC_4910683', 'ISIC_9360142',
        'ISIC_2863809', 'ISIC_4748668', 'ISIC_5681315', 'ISIC_3202829',
        'ISIC_3450978', 'ISIC_9704624', 'ISIC_4350914', 'ISIC_3587744',
        'ISIC_8190321', 'ISIC_1766413', 'ISIC_2872769', 'ISIC_3186625',
        'ISIC_0170059', 'ISIC_4858099', 'ISIC_0314462', 'ISIC_2811886',
        'ISIC_2140099', 'ISIC_9514450', 'ISIC_1195354', 'ISIC_8325872',
        'ISIC_0227038', 'ISIC_6342641', 'ISIC_4162828', 'ISIC_7597293',
        'ISIC_5278307', 'ISIC_3774190', 'ISIC_2957196', 'ISIC_4443545',
        'ISIC_3455136', 'ISIC_0610499', 'ISIC_8483008', 'ISIC_0243683',
        'ISIC_9028131', 'ISIC_8507102', 'ISIC_7128535', 'ISIC_4085552',
        'ISIC_2940763', 'ISIC_1219894', 'ISIC_1043313', 'ISIC_6587979',
        'ISIC_7050773', 'ISIC_3230164', 'ISIC_5159557', 'ISIC_7854457',
        'ISIC_2582493', 'ISIC_5161114', 'ISIC_5238910', 'ISIC_6515221',
        'ISIC_7771339', 'ISIC_9274260', 'ISIC_8054626', 'ISIC_1178847',
        'ISIC_0236778', 'ISIC_6704518', 'ISIC_4214813', 'ISIC_0322818',
        'ISIC_0230209', 'ISIC_7682938', 'ISIC_1852500', 'ISIC_3699454',
        'ISIC_4693693', 'ISIC_9574591', 'ISIC_3465766', 'ISIC_1826803',
        'ISIC_6234881', 'ISIC_2417958', 'ISIC_8142203', 'ISIC_5019268',
        'ISIC_3251719', 'ISIC_4654808', 'ISIC_1027856', 'ISIC_3262153',
        'ISIC_4681838', 'ISIC_6594555', 'ISIC_8623291', 'ISIC_3167092',
        'ISIC_8791163', 'ISIC_1538510', 'ISIC_3962218', 'ISIC_2160145',
        'ISIC_7690654', 'ISIC_9464203', 'ISIC_4673844', 'ISIC_9481260',
        'ISIC_5407240', 'ISIC_5179742', 'ISIC_8851901', 'ISIC_7433711',
        'ISIC_5777548', 'ISIC_2164933', 'ISIC_7194695', 'ISIC_7115605',
        'ISIC_7560157', 'ISIC_1323909', 'ISIC_0307958', 'ISIC_8015259',
        'ISIC_3089729', 'ISIC_3048886', 'ISIC_0861066', 'ISIC_6110309',
        'ISIC_9103289', 'ISIC_2853454', 'ISIC_1436572', 'ISIC_9650546',
        'ISIC_8208962', 'ISIC_5218561', 'ISIC_3285862', 'ISIC_5361506',
        'ISIC_8196660', 'ISIC_0356238', 'ISIC_1156392', 'ISIC_2761440',
        'ISIC_0645462', 'ISIC_4908514', 'ISIC_1374795', 'ISIC_3481768',
        'ISIC_2102371', 'ISIC_4548990', 'ISIC_7200676', 'ISIC_8827725',
        'ISIC_0667149', 'ISIC_7028320', 'ISIC_5485142', 'ISIC_9698871',
        'ISIC_7764481', 'ISIC_8831706', 'ISIC_4478276', 'ISIC_0401250',
        'ISIC_6987824', 'ISIC_7789537', 'ISIC_1114860', 'ISIC_7586566',
        'ISIC_0343061', 'ISIC_1442157', 'ISIC_9161937', 'ISIC_5904214',
        'ISIC_8335489', 'ISIC_9994768', 'ISIC_4384331', 'ISIC_0639415',
        'ISIC_0982984', 'ISIC_2195070', 'ISIC_9022865', 'ISIC_0159060',
        'ISIC_4933735', 'ISIC_3571989', 'ISIC_8593130', 'ISIC_1585919',
        'ISIC_3907656', 'ISIC_9728805', 'ISIC_6029052', 'ISIC_3582787',
        'ISIC_2205007', 'ISIC_1447559'
    ]
    train = train[~train['image_name'].isin(drop_image_name)].reset_index(
        drop=True)

    # Preprocessing metadata
    # OneHotEncoder
    train, test = preprocessing_meta(train, test)
    features_num = len([
        f for f in train.columns
        if f not in ['image_name', 'patient_id', 'target', 'fold']
    ])

    # Model  ####################################################################
    net = ENet(model_name=cfg.train.model_name, meta_features_num=features_num)
    transform = ImageTransform(img_size=cfg.data.img_size,
                               input_res=chris_image_size)

    # Lightning Module  #########################################################
    model = MelanomaSystem(net, cfg, img_paths, train, test, transform,
                           experiment)

    checkpoint_callback = ModelCheckpoint(filepath='./checkpoint',
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='avg_val_loss',
                                          mode='min',
                                          prefix=cfg.exp.exp_name + '_')

    trainer = Trainer(max_epochs=cfg.train.epoch,
                      checkpoint_callback=checkpoint_callback,
                      gpus=[0])

    # Train & Test  ############################################################
    # Train
    trainer.fit(model)
    experiment.log_metric('best_auc', model.best_auc)
    checkpoint_path = glob.glob(f'./checkpoint/{cfg.exp.exp_name}_*.ckpt')[0]
    experiment.log_asset(file_data=checkpoint_path)

    # Test
    for i in range(test_num):
        trainer.test(model)

    # Submit
    sub_list = glob.glob(f'submission_{cfg.exp.exp_name}*.csv')
    _ = summarize_submit(sub_list,
                         experiment,
                         filename=f'submission_all_{cfg.exp.exp_name}.csv')

    # oof
    valid_dataset = MelanomaDataset(train,
                                    img_paths['train'],
                                    transform,
                                    phase='test')
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=cfg.train.batch_size,
                                  pin_memory=False,
                                  shuffle=False,
                                  drop_last=False)
    for i in range(10):
        trainer.test(model, test_dataloaders=valid_dataloader)

    # Submit
    sub_list = glob.glob('submission*.csv')
    _ = summarize_submit(sub_list,
                         experiment,
                         filename=f'submission_oof_{cfg.exp.exp_name}.csv')

    # Reset
    del model, trainer, net, experiment
Ejemplo n.º 12
0
# Train the model
if hyper_params["LOAD_MODEL"]:
    model = torch.load(f'hyper_params["LOAD_MODEL_NAME"].pt')
else:
    train_mnist_vae(train_loader,
                    model,
                    criterion=optimizer,
                    n_epoch=hyper_params["EPOCH"],
                    experiment=experiment,
                    beta=hyper_params["BETA"],
                    loss_type="mse",
                    flatten=False)

torch.save(model, f'hyper_params["MODEL_NAME"].pt')
model.save_weights(f'./{hyper_params["MODEL_NAME"]}.h5')
experiment.log_asset(file_data=f'./{hyper_params["MODEL_NAME"]}.h5',
                     file_name='model.h5')

# Compute p-values
pval, _ = compute_empirical_pval(train_data.data, model, test_data.data)
pval_order = numpy.argsort(pval)

# Plot p-values
x_line = numpy.arange(0, test_data.data.shape[0], step=1)
y_line = numpy.linspace(0, 1, test_data.data.shape[0])
y_adj = numpy.arange(0, test_data.data.shape[0],
                     step=1) / test_data.data.shape[0] * hyper_params["ALPHA"]
zoom = int(0.2 * test_data.data.shape[0])  # nb of points to zoom
index = numpy.concatenate([
    numpy.repeat(True, len(id_min_test)),
    numpy.repeat(False, len(id_maj_test))
])
Ejemplo n.º 13
0
def train_cifar10(batch_size: int,
                  learning_rate: float,
                  epochs: int,
                  experiment: Experiment,
                  model: Sequential = get_model(),
                  initial_epoch: int = 0,
                  training_datagen: ImageDataGenerator = ImageDataGenerator(),
                  scheduler: Callable[[int], float] = None,
                  early_stopping_th: Optional[int] = 250,
                  data_portion: float = 1.0,
                  find_lr: bool = False) -> None:
    preprocessing_fnc = training_datagen.preprocessing_function
    name = experiment.get_key()
    log_path, model_path = get_output_paths(name)
    data = get_cifar10_data(data_portion=data_portion)

    training_datagen.fit(data.x_train)
    log_images(data.x_train, training_datagen, experiment)
    log_input_images(data.x_train, data.y_train, training_datagen, experiment)

    opt = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    log_model_plot(experiment, model)

    csv_cb = CSVLogger(log_path)
    keep_best_cb = KeepBest('val_acc')
    callbacks = [csv_cb,
                 keep_best_cb]  # [csv_cb, early_stopping_cb, keep_best_cb]
    if early_stopping_th is not None:
        early_stopping_cb = EarlyStopping('val_acc',
                                          patience=early_stopping_th,
                                          restore_best_weights=True,
                                          verbose=2)
        callbacks.append(early_stopping_cb)
    if scheduler is not None:
        scheduler.experiment_log(experiment=experiment,
                                 epochs=list(range(epochs)))
        callbacks.append(LearningRateScheduler(scheduler))
    if find_lr:
        lrf = LearningRateFinder(model=model)
        lrf.lrMult = (10e-1 / learning_rate)**(
            1.0 / (epochs * len(data.x_train) / batch_size))
        callbacks = [
            LambdaCallback(
                on_batch_end=lambda batch, logs: lrf.on_batch_end(batch, logs))
        ]

    model.fit_generator(training_datagen.flow(data.x_train,
                                              data.y_train,
                                              batch_size=batch_size),
                        steps_per_epoch=len(data.x_train) / batch_size,
                        epochs=epochs,
                        validation_data=(preprocessing_fnc(data.x_dev),
                                         data.y_dev),
                        shuffle=True,
                        callbacks=callbacks,
                        verbose=2,
                        initial_epoch=initial_epoch)
    model.save(model_path)
    experiment.log_asset(model_path)
    experiment.log_asset(log_path)

    if find_lr:
        experiment.log_figure('lr vs acc', lrf.plot_loss())

    log_final_metrics(experiment, model, data, preprocessing_fnc)
Ejemplo n.º 14
0
class Trainer():
    def __init__(self, log_dir, cfg):

        self.path = log_dir
        self.cfg = cfg

        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(self.path, 'Model')
            self.log_dir = os.path.join(self.path, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.log_dir)
            self.writer = SummaryWriter(log_dir=self.log_dir)
            self.logfile = os.path.join(self.path, "logfile.log")
            sys.stdout = Logger(logfile=self.logfile)

        self.data_dir = cfg.DATASET.DATA_DIR
        self.max_epochs = cfg.TRAIN.MAX_EPOCHS
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)

        self.batch_size = cfg.TRAIN.BATCH_SIZE
        self.lr = cfg.TRAIN.LEARNING_RATE

        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True

        sample = cfg.SAMPLE
        self.dataset = []
        self.dataloader = []
        self.use_feats = cfg.model.use_feats
        eval_split = cfg.EVAL if cfg.EVAL else 'val'
        train_split = cfg.DATASET.train_split
        if cfg.DATASET.DATASET == 'clevr':
            clevr_collate_fn = collate_fn
            cogent = cfg.DATASET.COGENT
            if cogent:
                print(f'Using CoGenT {cogent.upper()}')

            if cfg.TRAIN.FLAG:
                self.dataset = ClevrDataset(data_dir=self.data_dir,
                                            split=train_split + cogent,
                                            sample=sample,
                                            **cfg.DATASET.params)
                self.dataloader = DataLoader(dataset=self.dataset,
                                             batch_size=cfg.TRAIN.BATCH_SIZE,
                                             shuffle=True,
                                             num_workers=cfg.WORKERS,
                                             drop_last=True,
                                             collate_fn=clevr_collate_fn)

            self.dataset_val = ClevrDataset(data_dir=self.data_dir,
                                            split=eval_split + cogent,
                                            sample=sample,
                                            **cfg.DATASET.params)
            self.dataloader_val = DataLoader(dataset=self.dataset_val,
                                             batch_size=cfg.TEST_BATCH_SIZE,
                                             drop_last=False,
                                             shuffle=False,
                                             num_workers=cfg.WORKERS,
                                             collate_fn=clevr_collate_fn)

        elif cfg.DATASET.DATASET == 'gqa':
            if self.use_feats == 'spatial':
                gqa_collate_fn = collate_fn_gqa
            elif self.use_feats == 'objects':
                gqa_collate_fn = collate_fn_gqa_objs
            if cfg.TRAIN.FLAG:
                self.dataset = GQADataset(data_dir=self.data_dir,
                                          split=train_split,
                                          sample=sample,
                                          use_feats=self.use_feats,
                                          **cfg.DATASET.params)
                self.dataloader = DataLoader(dataset=self.dataset,
                                             batch_size=cfg.TRAIN.BATCH_SIZE,
                                             shuffle=True,
                                             num_workers=cfg.WORKERS,
                                             drop_last=True,
                                             collate_fn=gqa_collate_fn)

            self.dataset_val = GQADataset(data_dir=self.data_dir,
                                          split=eval_split,
                                          sample=sample,
                                          use_feats=self.use_feats,
                                          **cfg.DATASET.params)
            self.dataloader_val = DataLoader(dataset=self.dataset_val,
                                             batch_size=cfg.TEST_BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=cfg.WORKERS,
                                             drop_last=False,
                                             collate_fn=gqa_collate_fn)

        # load model
        self.vocab = load_vocab(cfg)
        self.model, self.model_ema = mac.load_MAC(cfg, self.vocab)

        self.weight_moving_average(alpha=0)
        if cfg.TRAIN.RADAM:
            self.optimizer = RAdam(self.model.parameters(), lr=self.lr)
        else:
            self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)

        self.start_epoch = 0
        if cfg.resume_model:
            location = 'cuda' if cfg.CUDA else 'cpu'
            state = torch.load(cfg.resume_model, map_location=location)
            self.model.load_state_dict(state['model'])
            self.optimizer.load_state_dict(state['optim'])
            self.start_epoch = state['iter'] + 1
            state = torch.load(cfg.resume_model_ema, map_location=location)
            self.model_ema.load_state_dict(state['model'])

        if cfg.start_epoch is not None:
            self.start_epoch = cfg.start_epoch

        self.previous_best_acc = 0.0
        self.previous_best_epoch = 0
        self.previous_best_loss = 100
        self.previous_best_loss_epoch = 0

        self.total_epoch_loss = 0
        self.prior_epoch_loss = 10

        self.print_info()
        self.loss_fn = torch.nn.CrossEntropyLoss().cuda()

        self.comet_exp = Experiment(
            project_name=cfg.COMET_PROJECT_NAME,
            api_key=os.getenv('COMET_API_KEY'),
            workspace=os.getenv('COMET_WORKSPACE'),
            disabled=cfg.logcomet is False,
        )
        if cfg.logcomet:
            exp_name = cfg_to_exp_name(cfg)
            print(exp_name)
            self.comet_exp.set_name(exp_name)
            self.comet_exp.log_parameters(flatten_json_iterative_solution(cfg))
            self.comet_exp.log_asset(self.logfile)
            self.comet_exp.log_asset_data(json.dumps(cfg, indent=4),
                                          file_name='cfg.json')
            self.comet_exp.set_model_graph(str(self.model))
            if cfg.cfg_file:
                self.comet_exp.log_asset(cfg.cfg_file)

        with open(os.path.join(self.path, 'cfg.json'), 'w') as f:
            json.dump(cfg, f, indent=4)

    def print_info(self):
        print('Using config:')
        pprint.pprint(self.cfg)
        print("\n")

        pprint.pprint("Size of train dataset: {}".format(len(self.dataset)))
        # print("\n")
        pprint.pprint("Size of val dataset: {}".format(len(self.dataset_val)))
        print("\n")

        print("Using MAC-Model:")
        pprint.pprint(self.model)
        print("\n")

    def weight_moving_average(self, alpha=0.999):
        for param1, param2 in zip(self.model_ema.parameters(),
                                  self.model.parameters()):
            param1.data *= alpha
            param1.data += (1.0 - alpha) * param2.data

    def set_mode(self, mode="train"):
        if mode == "train":
            self.model.train()
            self.model_ema.train()
        else:
            self.model.eval()
            self.model_ema.eval()

    def reduce_lr(self):
        epoch_loss = self.total_epoch_loss  # / float(len(self.dataset) // self.batch_size)
        lossDiff = self.prior_epoch_loss - epoch_loss
        if ((lossDiff < 0.015 and self.prior_epoch_loss < 0.5 and self.lr > 0.00002) or \
            (lossDiff < 0.008 and self.prior_epoch_loss < 0.15 and self.lr > 0.00001) or \
            (lossDiff < 0.003 and self.prior_epoch_loss < 0.10 and self.lr > 0.000005)):
            self.lr *= 0.5
            print("Reduced learning rate to {}".format(self.lr))
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = self.lr
        self.prior_epoch_loss = epoch_loss
        self.total_epoch_loss = 0

    def save_models(self, iteration):
        save_model(self.model,
                   self.optimizer,
                   iteration,
                   self.model_dir,
                   model_name="model")
        save_model(self.model_ema,
                   None,
                   iteration,
                   self.model_dir,
                   model_name="model_ema")

    def train_epoch(self, epoch):
        cfg = self.cfg
        total_loss = 0.
        total_correct = 0
        total_samples = 0

        self.labeled_data = iter(self.dataloader)
        self.set_mode("train")

        dataset = tqdm(self.labeled_data, total=len(self.dataloader), ncols=20)

        for data in dataset:
            ######################################################
            # (1) Prepare training data
            ######################################################
            image, question, question_len, answer = data['image'], data[
                'question'], data['question_length'], data['answer']
            answer = answer.long()
            question = Variable(question)
            answer = Variable(answer)

            if cfg.CUDA:
                if self.use_feats == 'spatial':
                    image = image.cuda()
                elif self.use_feats == 'objects':
                    image = [e.cuda() for e in image]
                question = question.cuda()
                answer = answer.cuda().squeeze()
            else:
                question = question
                image = image
                answer = answer.squeeze()

            ############################
            # (2) Train Model
            ############################
            self.optimizer.zero_grad()

            scores = self.model(image, question, question_len)
            loss = self.loss_fn(scores, answer)
            loss.backward()

            if self.cfg.TRAIN.CLIP_GRADS:
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.cfg.TRAIN.CLIP)

            self.optimizer.step()
            self.weight_moving_average()

            ############################
            # (3) Log Progress
            ############################
            correct = scores.detach().argmax(1) == answer
            total_correct += correct.sum().cpu().item()
            total_loss += loss.item() * answer.size(0)
            total_samples += answer.size(0)

            avg_loss = total_loss / total_samples
            train_accuracy = total_correct / total_samples
            # accuracy = correct.sum().cpu().numpy() / answer.shape[0]

            # if avg_loss == 0:
            #     avg_loss = loss.item()
            #     train_accuracy = accuracy
            # else:
            #     avg_loss = 0.99 * avg_loss + 0.01 * loss.item()
            #     train_accuracy = 0.99 * train_accuracy + 0.01 * accuracy
            # self.total_epoch_loss += loss.item() * answer.size(0)

            dataset.set_description(
                'Epoch: {}; Avg Loss: {:.5f}; Avg Train Acc: {:.5f}'.format(
                    epoch + 1, avg_loss, train_accuracy))

        self.total_epoch_loss = avg_loss

        dict = {
            "loss": avg_loss,
            "accuracy": train_accuracy,
            "avg_loss": avg_loss,  # For commet
            "avg_accuracy": train_accuracy,  # For commet
        }
        return dict

    def train(self):
        cfg = self.cfg
        print("Start Training")
        for epoch in range(self.start_epoch, self.max_epochs):

            with self.comet_exp.train():
                dict = self.train_epoch(epoch)
                self.reduce_lr()
                dict['epoch'] = epoch + 1
                dict['lr'] = self.lr
                self.comet_exp.log_metrics(
                    dict,
                    epoch=epoch + 1,
                )

            with self.comet_exp.validate():
                dict = self.log_results(epoch, dict)
                dict['epoch'] = epoch + 1
                dict['lr'] = self.lr
                self.comet_exp.log_metrics(
                    dict,
                    epoch=epoch + 1,
                )

            if cfg.TRAIN.EALRY_STOPPING:
                if epoch - cfg.TRAIN.PATIENCE == self.previous_best_epoch:
                    # if epoch - cfg.TRAIN.PATIENCE == self.previous_best_loss_epoch:
                    print('Early stop')
                    break

        self.comet_exp.log_asset(self.logfile)
        self.save_models(self.max_epochs)
        self.writer.close()
        print("Finished Training")
        print(
            f"Highest validation accuracy: {self.previous_best_acc} at epoch {self.previous_best_epoch}"
        )

    def log_results(self, epoch, dict, max_eval_samples=None):
        epoch += 1
        self.writer.add_scalar("avg_loss", dict["loss"], epoch)
        self.writer.add_scalar("train_accuracy", dict["accuracy"], epoch)

        metrics = self.calc_accuracy("validation",
                                     max_samples=max_eval_samples)
        self.writer.add_scalar("val_accuracy_ema", metrics['acc_ema'], epoch)
        self.writer.add_scalar("val_accuracy", metrics['acc'], epoch)
        self.writer.add_scalar("val_loss_ema", metrics['loss_ema'], epoch)
        self.writer.add_scalar("val_loss", metrics['loss'], epoch)

        print(
            "Epoch: {epoch}\tVal Acc: {acc},\tVal Acc EMA: {acc_ema},\tAvg Loss: {loss},\tAvg Loss EMA: {loss_ema},\tLR: {lr}"
            .format(epoch=epoch, lr=self.lr, **metrics))

        if metrics['acc'] > self.previous_best_acc:
            self.previous_best_acc = metrics['acc']
            self.previous_best_epoch = epoch
        if metrics['loss'] < self.previous_best_loss:
            self.previous_best_loss = metrics['loss']
            self.previous_best_loss_epoch = epoch

        if epoch % self.snapshot_interval == 0:
            self.save_models(epoch)

        return metrics

    def calc_accuracy(self, mode="train", max_samples=None):
        self.set_mode("validation")

        if mode == "train":
            loader = self.dataloader
        # elif (mode == "validation") or (mode == 'test'):
        #     loader = self.dataloader_val
        else:
            loader = self.dataloader_val

        total_correct = 0
        total_correct_ema = 0
        total_samples = 0
        total_loss = 0.
        total_loss_ema = 0.
        pbar = tqdm(loader, total=len(loader), desc=mode.upper(), ncols=20)
        for data in pbar:

            image, question, question_len, answer = data['image'], data[
                'question'], data['question_length'], data['answer']
            answer = answer.long()
            question = Variable(question)
            answer = Variable(answer)

            if self.cfg.CUDA:
                if self.use_feats == 'spatial':
                    image = image.cuda()
                elif self.use_feats == 'objects':
                    image = [e.cuda() for e in image]
                question = question.cuda()
                answer = answer.cuda().squeeze()

            with torch.no_grad():
                scores = self.model(image, question, question_len)
                scores_ema = self.model_ema(image, question, question_len)

                loss = self.loss_fn(scores, answer)
                loss_ema = self.loss_fn(scores_ema, answer)

            correct = scores.detach().argmax(1) == answer
            correct_ema = scores_ema.detach().argmax(1) == answer

            total_correct += correct.sum().cpu().item()
            total_correct_ema += correct_ema.sum().cpu().item()

            total_loss += loss.item() * answer.size(0)
            total_loss_ema += loss_ema.item() * answer.size(0)

            total_samples += answer.size(0)

            avg_acc = total_correct / total_samples
            avg_acc_ema = total_correct_ema / total_samples
            avg_loss = total_loss / total_samples
            avg_loss_ema = total_loss_ema / total_samples

            pbar.set_postfix({
                'Acc': f'{avg_acc:.5f}',
                'Acc Ema': f'{avg_acc_ema:.5f}',
                'Loss': f'{avg_loss:.5f}',
                'Loss Ema': f'{avg_loss_ema:.5f}',
            })

        return dict(acc=avg_acc,
                    acc_ema=avg_acc_ema,
                    loss=avg_loss,
                    loss_ema=avg_loss_ema)
Ejemplo n.º 15
0
allexperiments = api.get('wronnyhuang/landscape2d')
for expt in allexperiments:
    if exptname != api.get_experiment_other(expt, 'Name')[0]: continue
    raw = api.get_experiment_metrics_raw(expt)
    for r in raw:
        if r['metricName'] == 'xent':
            xent[r['step']] = r['metricValue']
        elif r['metricName'] == 'acc':
            acc[r['step']] = r['metricValue']

for idx, (c1, c2) in enumerate(cfeed):

    if np.mod(idx, args.npart) != args.part: continue
    if idx in xent and idx in acc:
        print('skipping idx ' + str(idx))
        continue
    perturbedWeights = [
        w + c1 * d1 + c2 * d2 for w, d1, d2 in zip(weights, dw1, dw2)
    ]
    evaluator.assign_weights(perturbedWeights)
    xent[idx], acc[idx], _ = evaluator.eval()
    experiment.log_metric('xent', xent[idx], step=idx)
    experiment.log_metric('acc', acc[idx], step=idx)
    print('point ', idx + 1, 'of', len(cfeed), '| time:', time())

# save plot data and log the figure
with open(exptname + '.pkl', 'wb') as f:
    pickle.dump((xent, acc), f)
experiment.log_asset(exptname + '.pkl')
os.remove(exptname + '.pkl')
Ejemplo n.º 16
0
    results_dir + 'predictions/predictions_result.txt', 'evaluation loss:' +
    str(evaluation_loss) + ' evaluation accuracy:' + str(evaluation_accuracy) +
    ' evaluation dice coef:' + str(evaluation_dice_coef))
make_file_and_write(results_dir + 'description.txt', description)

predicted_masks = model.predict(test_images, 1, verbose=1)
converted_test_images = convert_one_class_images_to_pixel_images_and_save(
    results_dir + 'predictions/images/', test_images, shape=input_shape)
converted_test_masks = convert_multiclass_matirx_masks_to_pixel_masks_and_save(
    results_dir + 'predictions/masks/', test_masks,
    mask_pixel_values_aka_classes)
converted_predicted_masks = convert_multiclass_matirx_masks_to_pixel_masks_and_save(
    results_dir + 'predictions/results/', predicted_masks,
    mask_pixel_values_aka_classes)

plot_model(model,
           to_file=results_dir + 'model_architecture.png',
           show_shapes=True,
           show_layer_names=True,
           rankdir='TB')
experiment.log_image(results_dir + 'model_architecture.png',
                     name='model_architecture.png')
experiment.log_asset(results_dir + 'unet.hdf5', file_name='unet.hdf5')
for index in range(len(test_images)):
    experiment.log_image(converted_test_images[index],
                         name=str(index) + '_test_image')
    experiment.log_image(converted_test_masks[index],
                         name=str(index) + '_test_mask')
    experiment.log_image(converted_predicted_masks[index],
                         name=str(index) + '_predicted_mask')
Ejemplo n.º 17
0
label_names = list(labeldf.taxonID.values)

callback_list = callbacks.create(experiment=experiment,
                                 train_data=model.train_split,
                                 validation_data=model.val_split,
                                 train_shp=model.train_shp,
                                 log_dir=save_dir,
                                 label_names=label_names,
                                 submodel=False)

neighbor.fit(model.train_split,
             epochs=model.config["train"]["ensemble"]["epochs"],
             validation_data=model.val_split,
             callbacks=callback_list)

#save
neighbor.save("{}/neighbors.h5".format(save_dir))

predicted_shp = model.predict(model=neighbor)
predicted_shp.to_file("{}/prediction.shp".format(save_dir))
experiment.log_asset("{}/prediction.shp".format(save_dir))
experiment.log_asset("{}/prediction.dbf".format(save_dir))
experiment.log_asset("{}/prediction.shx".format(save_dir))
experiment.log_asset("{}/prediction.cpg".format(save_dir))

estimate_a = neighbor.get_layer("ensemble_add_bias").get_weights()
experiment.log_metric(name="target_versus_context_weight",
                      value=estimate_a[0][0])

#estimate_lambda = neighbor.get_layer("distance_decay").get_weights()
#experiment.log_metric(name="distance_decay_rate", value=estimate_lambda[0])
Ejemplo n.º 18
0
    opt_file = "prev_experiments/11k_wgan_feature_pixelDA.yml"

    opts = load_opts(path=root / opt_file,
                     default=root / "shared/defaults.yml")

    opts = set_mode("test", opts)
    opts.data.loaders.batch_size = 1
    val_loader = get_loader(opts)
    dataset_size = len(val_loader)

    print("#testing images = %d" % dataset_size)

    comet_exp = Experiment(workspace=opts.comet.workspace,
                           project_name=opts.comet.project_name)
    if comet_exp is not None:
        comet_exp.log_asset(file_data=str(root / opt_file),
                            file_name=root / opt_file)
        comet_exp.log_parameters(opts)

    checkpoint_directory, image_directory = prepare_sub_folder(
        opts.train.output_dir)

    opts.comet.exp = comet_exp

    model = create_model(opts)
    model.setup()

    total_steps = 0

    for i, data in enumerate(val_loader):
        #
        with Timer("Elapsed time in update " + str(i) + ": %f"):
Ejemplo n.º 19
0
if not args.only_training:
    print('Incremental processing evaluation started.')
    
    if not args.comet_track:
        experiment = None
    
    # outputs using partial, incremental inputs
    partial_outputs = Results(test_loader, model, my_device, label_pad_id,
                                     corpus, seq2seq, prophecies=None)
    partial_outputs.print_metrics(model_name, experiment)
    
    pickle.dump(partial_outputs, 
                open('outputs/results_'+model_name, 'wb'))
    if args.comet_track:
        experiment.log_asset('outputs/results_'+model_name, 
                             'results_partialInputs_'+model_name)
    
    # outputs using GPT2 prophecies
    prophecies = pickle.load(open('prophecies/gpt2Prophecies_'+args.task+'_testset-withOutliers', 'rb'))
    prophecies_outputs = Results(test_loader, model, 
                                            my_device, label_pad_id, corpus, 
                                            seq2seq, prophecies) 
    prophecies_outputs.print_metrics(model_name+'_gpt', experiment)
    
    pickle.dump(prophecies_outputs, 
                open('outputs/resultsGPT_'+model_name, 'wb'))
    if args.comet_track:
        experiment.log_asset('outputs/resultsGPT_'+model_name,
                             "results_prophecies_"+model_name)

print('Finished!')
Ejemplo n.º 20
0
    os.makedirs('pickle', exist_ok=True); pickle.dump(dw1, open(join('pickle', args.ckpt), 'wb'))
  along = 'along_eigvec'
else:
  dw1 = evaluator.get_random_dir()
  along = 'along_random_'+str(args.seed)

# span
cfeed = args.span/2 * np.linspace(-1, 1, 30)
cfeed_enum = list(enumerate(cfeed)); random.shuffle(cfeed_enum) # shuffle order so we see plot shape sooner on comet

# loop over all points along surface direction
name = 'span_' + str(args.span) + '/' + basename(args.ckpt) + '/' + along # name of experiment
xent = np.zeros(len(cfeed))
weights = evaluator.get_weights()
for i, (idx, c) in enumerate(cfeed_enum):

  perturbedWeights = [w + c * d1 for w, d1 in zip(weights, dw1)]
  evaluator.assign_weights(perturbedWeights)
  xent[idx], acc, _ = evaluator.eval()
  experiment.log_metric(name, xent[idx], idx)
  print('progress:', i + 1, 'of', len(cfeed_enum), '| time:', time())

# save plot data and log the figure
xent = np.reshape(np.array(xent), cfeed.shape)
plt.plot(cfeed, xent)
experiment.log_figure(name)

unique = utils.timenow()
pickle.dump((cfeed, xent), open(unique, 'wb'))
experiment.log_asset(file_path=unique, file_name=name+'.pkl')
Ejemplo n.º 21
0
def upload_experiment():
    experiment = Experiment(**COMET_ML_KEY)
    experiment.log_asset_folder('./datasets')
    experiment.log_asset_folder('./models')
    experiment.log_asset_folder('./knapsack')
    experiment.log_asset(RESULT_FILE)
Ejemplo n.º 22
0
              verbose=1)

    y_pred = model.predict(
        oDataSet.attributes[oData.Testing_indexes]).argmax(axis=1)
    y_true = oDataSet.labels[oData.Testing_indexes]

    experiment.log_metric("test_accuracy", accuracy_score(y_true, y_pred))
    experiment.log_metric("beta", best_b)
    experiment.log_metric("neurons", best_p)
    experiment.log_confusion_matrix(matrix=confusion_matrix(y_true,
                                                            y_pred).tolist(),
                                    labels=oDataSet.labelsNames)
    # model.save('model.h5')
    # experiment.log_asset("model.h5")
    model.save_weights('model.weights')
    experiment.log_asset("model.weights")

    print(accuracy_score(y_true, y_pred))
    print(confusion_matrix(y_true, y_pred))
    oData.confusion_matrix = confusion_matrix(y_true, y_pred)
    oData.model = model
    oData.params = {
        "k_fold": K_FOLD,
        "GRID_RESULT": grid_result,
        "GRID_VALUES_NEURON": GRID_NEURON,
        "GRID_VALUES_BETA": GRID_B,
        "LEARNING RATE": LEARNING_RATE,
        "EPOCHS": epochs
    }
    experiment.log_other("params", oData.params)
    y_pred = model.predict(
Ejemplo n.º 23
0
def run(args, train, sparse_evidences, claims_dict):
    BATCH_SIZE = args.batch_size
    LEARNING_RATE = args.learning_rate
    DATA_SAMPLING = args.data_sampling
    NUM_EPOCHS = args.epochs
    MODEL = args.model
    RANDOMIZE = args.no_randomize
    PRINT = args.print

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    logger = Logger('./logs/{}'.format(time.localtime()))

    if MODEL:
        print("Loading pretrained model...")
        model = torch.load(MODEL)
        model.load_state_dict(torch.load(MODEL).state_dict())
    else:
        model = cdssm.CDSSM()
        model = model.cuda()
        model = model.to(device)

    # model = cdssm.CDSSM()
    # model = model.cuda()
    # model = model.to(device)

    if torch.cuda.device_count() > 0:
        print("Let's use", torch.cuda.device_count(), "GPU(s)!")
        model = nn.DataParallel(model)

    print("Created model with {:,} parameters.".format(
        putils.count_parameters(model)))

    # if MODEL:
    # print("TEMPORARY change to loading!")
    # model.load_state_dict(torch.load(MODEL).state_dict())

    print("Created dataset...")

    # use an 80/20 train/validate split!
    train_size = int(len(train) * 0.80)
    #test = int(len(train) * 0.5)
    train_dataset = pytorch_data_loader.WikiDataset(
        train[:train_size],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)
    val_dataset = pytorch_data_loader.WikiDataset(
        train[train_size:],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=pytorch_data_loader.PadCollate())
    val_dataloader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                num_workers=0,
                                shuffle=True,
                                collate_fn=pytorch_data_loader.PadCollate())

    # Loss and optimizer
    criterion = torch.nn.NLLLoss()
    # criterion = torch.nn.SoftMarginLoss()
    # if torch.cuda.device_count() > 0:
    # print("Let's parallelize the backward pass...")
    # criterion = DataParallelCriterion(criterion)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=1e-3)

    OUTPUT_FREQ = max(int((len(train_dataset) / BATCH_SIZE) * 0.02), 20)
    parameters = {
        "batch size": BATCH_SIZE,
        "epochs": NUM_EPOCHS,
        "learning rate": LEARNING_RATE,
        "optimizer": optimizer.__class__.__name__,
        "loss": criterion.__class__.__name__,
        "training size": train_size,
        "data sampling rate": DATA_SAMPLING,
        "data": args.data,
        "sparse_evidences": args.sparse_evidences,
        "randomize": RANDOMIZE,
        "model": MODEL
    }
    experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee",
                            project_name="clsm",
                            workspace="moinnadeem")
    experiment.add_tag("train")
    experiment.log_asset("cdssm.py")
    experiment.log_dataset_info(name=args.data)
    experiment.log_parameters(parameters)

    model_checkpoint_dir = "models/saved_model"
    for key, value in parameters.items():
        if type(value) == str:
            value = value.replace("/", "-")
        if key != "model":
            model_checkpoint_dir += "_{}-{}".format(key.replace(" ", "_"),
                                                    value)

    print("Training...")
    beginning_time = time.time()
    best_loss = torch.tensor(float("inf"),
                             dtype=torch.float)  # begin loss at infinity

    for epoch in range(NUM_EPOCHS):
        beginning_time = time.time()
        mean_train_acc = 0.0
        train_running_loss = 0.0
        train_running_accuracy = 0.0
        model.train()
        experiment.log_current_epoch(epoch)

        with experiment.train():
            for train_batch_num, inputs in enumerate(train_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()
                #claims = claims.to(device).float()
                #evidences = evidences.to(device).float()
                #labels = labels.to(device)

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y = y.unsqueeze(0)
                # y = y.unsqueeze(0)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()
                # y = y.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])
                # loss = criterion(y_pred, y)

                y = y.float()
                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                accuracy = (binary_y == binary_pred).to("cuda")
                accuracy = accuracy.float()
                accuracy = accuracy.mean()
                train_running_accuracy += accuracy.item()
                mean_train_acc += accuracy.item()
                train_running_loss += loss.item()

                if PRINT:
                    for idx in range(len(y)):
                        print(
                            "Claim: {}, Evidence: {}, Prediction: {}, Label: {}"
                            .format(claims_text[0], evidences_text[idx],
                                    torch.exp(y_pred[idx]), y[idx]))

                if (train_batch_num %
                        OUTPUT_FREQ) == 0 and train_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    binary_y = torch.max(y, 1)[1]
                    binary_pred = torch.max(y_pred, 1)[1]
                    print(
                        "[{}:{}:{:3f}s] training loss: {}, training accuracy: {}, training recall: {}"
                        .format(
                            epoch, train_batch_num /
                            (len(train_dataset) / BATCH_SIZE), elapsed_time,
                            train_running_loss / OUTPUT_FREQ,
                            train_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {
                        'train_loss': train_running_loss / OUTPUT_FREQ,
                        'train_accuracy': train_running_accuracy / OUTPUT_FREQ
                    }

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=train_batch_num *
                                              (epoch + 1))
                        logger.scalar_summary(tag, value, train_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             train_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             train_batch_num + 1)

                    train_running_loss = 0.0
                    beginning_time = time.time()
                    train_running_accuracy = 0.0
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        print("Running validation...")
        model.eval()
        pred = []
        true = []
        avg_loss = 0.0
        val_running_accuracy = 0.0
        val_running_loss = 0.0
        beginning_time = time.time()
        with experiment.validate():
            for val_batch_num, val_inputs in enumerate(val_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = val_inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])

                y = y.float()

                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                true.extend(binary_y.tolist())
                pred.extend(binary_pred.tolist())

                accuracy = (binary_y == binary_pred).to("cuda")

                accuracy = accuracy.float().mean()
                val_running_accuracy += accuracy.item()
                val_running_loss += loss.item()
                avg_loss += loss.item()

                if (val_batch_num % OUTPUT_FREQ) == 0 and val_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    print(
                        "[{}:{}:{:3f}s] validation loss: {}, accuracy: {}, recall: {}"
                        .format(
                            epoch,
                            val_batch_num / (len(val_dataset) / BATCH_SIZE),
                            elapsed_time, val_running_loss / OUTPUT_FREQ,
                            val_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {'val_accuracy': val_running_accuracy / OUTPUT_FREQ}

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=val_batch_num * (epoch + 1))
                        logger.scalar_summary(tag, value, val_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             val_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             val_batch_num + 1)

                    val_running_accuracy = 0.0
                    val_running_loss = 0.0
                    beginning_time = time.time()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        accuracy = accuracy_score(true, pred)
        print("[{}] mean accuracy: {}, mean loss: {}".format(
            epoch, accuracy, avg_loss / len(val_dataloader)))

        true = np.array(true).astype("int")
        pred = np.array(pred).astype("int")
        print(classification_report(true, pred))

        best_loss = torch.tensor(
            min(avg_loss / len(val_dataloader),
                best_loss.cpu().numpy()))
        is_best = bool((avg_loss / len(val_dataloader)) <= best_loss)

        putils.save_checkpoint(
            {
                "epoch": epoch,
                "model": model,
                "best_loss": best_loss
            },
            is_best,
            filename="{}_loss_{}".format(model_checkpoint_dir,
                                         best_loss.cpu().numpy()))
Ejemplo n.º 24
0
class Logger:
    """
    Logs/plots results to comet.

    Args:
        exp_config (dict): experiment configuration hyperparameters
        model_config (dict): model configuration hyperparameters
        data_config (dict): data configuration hyperparameters
    """
    def __init__(self, exp_config, model_config, data_config):
        self.exp_config = exp_config
        self.experiment = Experiment(**exp_config['comet_config'])
        self.experiment.disable_mp()
        self._log_hyper_params(exp_config, model_config, data_config)
        self._epoch = 0

    def _log_hyper_params(self, exp_config, model_config, data_config):
        """
        Log the hyper-parameters for the experiment.

        Args:
            exp_config (dict): experiment configuration hyperparameters
            model_config (dict): model configuration hyperparameters
            data_config (dict): data configuration hyperparameters
        """
        def flatten_arg_dict(arg_dict):
            flat_dict = {}
            for k, v in arg_dict.items():
                if type(v) == dict:
                    flat_v = flatten_arg_dict(v)
                    for kk, vv in flat_v.items():
                        flat_dict[k + '_' + kk] = vv
                else:
                    flat_dict[k] = v
            return flat_dict

        self.experiment.log_parameters(flatten_arg_dict(exp_config))
        self.experiment.log_parameters(flatten_arg_dict(model_config))
        self.experiment.log_parameters(flatten_arg_dict(data_config))

    def log(self, results, train_val):
        """
        Plot the results in comet.

        Args:
            results (dict): dictionary of metrics to plot
            train_val (str): either 'train' or 'val'
        """
        objectives, grads, params, images, metrics = results
        for metric_name, metric in objectives.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
            print(metric_name, ':', metric.item())
        if train_val == 'train':
            for grad_metric_name, grad_metric in grads.items():
                self.experiment.log_metric('grads_' + grad_metric_name,
                                           grad_metric, self._epoch)
        for param_name, param in params.items():
            self.experiment.log_metric(param_name + '_' + train_val, param,
                                       self._epoch)
        for image_name, imgs in images.items():
            self.plot_images(imgs, image_name, train_val)
        for metric_name, metric in metrics.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
        if train_val == 'val':
            self._epoch += 1

    def plot_images(self, images, title, train_val):
        """
        Plot a tensor of images.

        Args:
            images (torch.Tensor): a tensor of shape [steps, b, c, h, w]
            title (str): title for the images, e.g. reconstructions
            train_val (str): either 'train' or 'val'
        """
        # add a channel dimension if necessary
        if len(images.shape) == 4:
            s, b, h, w = images.shape
            images = images.view(s, b, 1, h, w)
        s, b, c, h, w = images.shape
        if b > 10:
            images = images[:, :10]
        # swap the steps and batch dimensions
        images = images.transpose(0, 1).contiguous()
        images = images.view(-1, c, h, w)
        # grid = make_grid(images.clamp(0, 1), nrow=s).numpy()
        grid = make_grid(images, nrow=s).numpy()
        if c == 1:
            grid = grid[0]
            cmap = 'gray'
        else:
            grid = np.transpose(grid, (1, 2, 0))
            cmap = None
        plt.imshow(grid, cmap=cmap)
        plt.axis('off')
        self.experiment.log_figure(figure=plt,
                                   figure_name=title + '_' + train_val)
        plt.close()

    def save(self, model):
        """
        Save the model weights in comet.

        Args:
            model (nn.Module): the model to be saved
        """
        if self._epoch % self.exp_config['checkpoint_interval'] == 0:
            print('Checkpointing the model...')
            state_dict = model.state_dict()
            cpu_state_dict = {k: v.cpu() for k, v in state_dict.items()}
            # save the state dictionary
            ckpt_path = os.path.join('./ckpt_epoch_' + str(self._epoch) +
                                     '.ckpt')
            torch.save(cpu_state_dict, ckpt_path)
            self.experiment.log_asset(ckpt_path)
            os.remove(ckpt_path)
            print('Done.')

    def load(self, model):
        """
        Load the model weights.
        """
        assert self.exp_config[
            'checkpoint_exp_key'] is not None, 'Checkpoint experiment key must be set.'
        print('Loading checkpoint from ' +
              self.exp_config['checkpoint_exp_key'] + '...')
        comet_api = comet_ml.papi.API(
            rest_api_key=self.exp_config['rest_api_key'])
        exp = comet_api.get_experiment(
            workspace=self.exp_config['comet_config']['workspace'],
            project_name=self.exp_config['comet_config']['project_name'],
            experiment=self.exp_config['checkpoint_exp_key'])
        # asset_list = comet_api.get_experiment_asset_list(self.exp_config['checkpoint_exp_key'])
        asset_list = exp.get_asset_list()
        # get most recent checkpoint
        ckpt_assets = [
            asset for asset in asset_list if 'ckpt' in asset['fileName']
        ]
        asset_times = [asset['createdAt'] for asset in ckpt_assets]
        asset = asset_list[asset_times.index(max(asset_times))]
        print('Checkpoint Name:', asset['fileName'])
        ckpt = exp.get_asset(asset['assetId'])
        state_dict = torch.load(io.BytesIO(ckpt))
        model.load(state_dict)
        print('Done.')
Ejemplo n.º 25
0
                        history['val_accuracy'].append(np.mean(val_accuracy))

                        # Save checkpoint if checkpointer metric improves
                        checkpointer.save_best(float(np.mean(val_loss)),
                                               global_step)

            # Check to stop training early
            if early_stopping and earlystopper.check_early_stop(
                    float(np.mean(val_loss))):
                break

        # Save training history
        history_file = os.path.join(output_dir,
                                    experiment_name + "_history.npz")
        save_history(history_file, history)
        experiment.log_asset(history_file)

        end_time = time.time()
        print("Training took " + str(('%.3f' % (end_time - start_time))) +
              " seconds for " + str(num_epochs) + " epochs")

        print("------------------------------------")
        print("Saving model...")
        checkpointer.save(global_step)
        experiment.log_asset_folder(checkpoint_dir)

    if testing:
        # Test the model
        print("------------------------------------")
        print("Testing model...")
Ejemplo n.º 26
0
                            float(acc_10) / float(total_batches))
                        experiment.log_metric(
                            'Acc@20',
                            float(acc_20) / float(total_batches))
                        experiment.log_metric(
                            'Acc@50',
                            float(acc_50) / float(total_batches))
                        running_loss = 0.0
                        acc_1 = 0.0
                        acc_5 = 0.0
                        acc_10 = 0.0
                        acc_20 = 0.0
                        acc_50 = 0.0
                        total_batches = 0.0

            print("Saving Epoch")

            torch.save({'model_state_dict': model.state_dict()},
                       "./models/" + arch_name.upper() + ".pt")
            experiment.log_asset("./models/" + arch_name.upper() + ".pt")

            if epoch % 5 == 0:

                val_loss_old = val_loss
                val_loss = val_model(model, val_loader)
                if val_loss - val_loss_old < 1e-3:
                    scheduler_step.step()
                    step_count += 1

        print("End here")
Ejemplo n.º 27
0
    "--git_hash",
    type=str,
    default="no-git-hash",
    help="output of git log --pretty=format:'%h' -n 1",
)
opts = parser.parse_args()

cudnn.benchmark = True
# Load experiment setting
config = get_config(opts.config)
max_iter = config["max_iter"]
display_size = config["display_size"]
config["vgg_model_path"] = opts.output_path

if comet_exp is not None:
    comet_exp.log_asset(file_data=opts.config, file_name=Path(opts.config))
    comet_exp.log_parameter("git_hash", opts.git_hash)
    comet_exp.log_parameters(flatten_opts(config))
# Setup model and data loader
if opts.trainer == "MUNIT":
    trainer = MUNIT_Trainer(config)
elif opts.trainer == "UNIT":
    trainer = UNIT_Trainer(config)
else:
    sys.exit("Only support MUNIT|UNIT")
trainer.cuda()

train_loader_a, train_loader_b, test_loader_a, test_loader_b = get_all_data_loaders(
    config)

test_loader_a_w_mask = get_data_loader_mask_and_im(
Ejemplo n.º 28
0
def train_model(
    xpath,
    ypath,
    xvalidpath,
    yvalidpath,
    xtestpath,
    ytestpath,
    modelpath,
    models,
    scaler,
):
    if not os.path.exists(os.path.abspath(modelpath)):
        os.mkdir(os.path.abspath(modelpath))

    experiment = Experiment(project_name="mof-oxidation-states")
    experiment.log_asset(xpath)
    experiment.log_asset(ypath)
    experiment.log_asset(xvalidpath)
    experiment.log_asset(yvalidpath)
    experiment.log_asset(xtestpath)
    experiment.log_asset(ytestpath)

    trainlogger.info("Train X: {}".format(xpath))
    trainlogger.info("Train y: {}".format(ypath))

    trainlogger.info("Validation X: {}".format(xvalidpath))
    trainlogger.info("Validation y: {}".format(yvalidpath))

    trainlogger.info("Test X: {}".format(xtestpath))
    trainlogger.info("Test y: {}".format(ytestpath))

    train_stem = Path(xpath).stem
    ml_object = MLOxidationStates.from_x_y_paths(
        xpath=os.path.abspath(xpath),
        ypath=os.path.abspath(ypath),
        xvalidpath=os.path.abspath(xvalidpath),
        yvalidpath=os.path.abspath(yvalidpath),
        modelpath=os.path.abspath(modelpath),
        scaler=scaler,
        n=int(10),
        voting="soft",
        calibrate="istonic",
        experiment=experiment,
    )

    X_test = np.load(xtestpath)
    y_test = np.load(ytestpath)

    X_test = ml_object.scaler.transform(X_test)

    models_loaded = []

    for model in models:
        name = Path(model).stem
        model = joblib.load(model)
        models_loaded.append((name, model))
    votingclassifier, _ = ml_object.calibrate_ensemble(
        models_loaded,
        ml_object.x_valid,
        ml_object.y_valid,
        ml_object.experiment,
        ml_object.voting,
        ml_object.calibrate,
    )

    votingclassifier_tuple = [("votingclassifier_" + train_stem, votingclassifier)]

    cores_test = ml_object.model_eval(
        votingclassifier_tuple, X_test, y_test, experiment, "test", modelpath
    )
    scores_train = ml_object.model_eval(
        votingclassifier_tuple, ml_object.x, ml_object.y, experiment, "train", modelpath
    )
    scores_valid = ml_object.model_eval(
        votingclassifier_tuple,
        ml_object.x_valid,
        ml_object.y_valid,
        experiment,
        "valid",
        modelpath,
    )
Ejemplo n.º 29
0
    }


def getBestModelfromTrials(trials):
    valid_trial_list = [
        trial for trial in trials if STATUS_OK == trial['result']['status']
    ]
    losses = [float(trial['result']['loss']) for trial in valid_trial_list]
    index_having_minumum_loss = np.argmin(losses)
    best_trial_obj = valid_trial_list[index_having_minumum_loss]
    return best_trial_obj['result']['mlp']


if __name__ == '__main__':
    experiment = Experiment(project_name='color-ml')
    with experiment.train():
        trials = Trials()
        best = fmin(keras_fmin_fnct,
                    get_space(),
                    algo=tpe.suggest,
                    max_evals=150,
                    trials=trials)
        X_train, Y_train, X_test, Y_test = data()
        print('Evalutation of best performing model:')

    joblib.dump(best, 'best.joblib')
    model = getBestModelfromTrials(trials)
    joblib.dump(model, 'best_model.joblib')
    experiment.log_asset('best.joblib')
    experiment.log_asset('best_model.joblib')
Ejemplo n.º 30
0
                        options['clip_gradient_norm'])
                    optimizer.step()

                    N, C = output_fw.shape

                    running_loss += total_loss.item()
                    #torch.cuda.empty_cache()

                div = len(train_dl)

                experiment.log_metric('Epoch', epoch)
                experiment.log_metric('Running_loss',
                                      running_loss / float(div))
                experiment.log_metric('Temporal_loss',
                                      temporal_loss / float(div))
                experiment.log_metric('Caption_loss',
                                      caption_loss / float(div))

                torch.save({
                    'model_state_dict': model.state_dict(),
                }, "full_caption_C3D_16_" + str(epoch) + ".pt")
                experiment.log_asset("full_caption_C3D_16_" + str(epoch) +
                                     ".pt")

            if epoch % 5 == 0:

                val_loss_old = val_loss
                val_loss = val_model(model, val_loader)
                if val_loss - val_loss_old < 1e-4:
                    step_count += 1