コード例 #1
0
ファイル: _9_multi_dec.py プロジェクト: cdsnlab/AIoTPlaceness
def train_multidec(args):
    print("Training multidec")
    device = torch.device(args.gpu)
    print("Loading dataset...")
    full_dataset = load_multi_csv_data(args, CONFIG)
    print("Loading dataset completed")
    # full_loader = DataLoader(full_dataset, batch_size=args.batch_size, shuffle=False)

    image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters,
                                 encodeLayer=[500, 500, 2000], activation="relu", dropout=0)
    image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" + str(args.latent_dim)) + ".pt")
    text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters,
                                encodeLayer=[500, 500, 2000], activation="relu", dropout=0)
    text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" + str(args.latent_dim)) + ".pt")
    mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, n_clusters=args.n_clusters)
    exp = Experiment("MDEC " + str(args.latent_dim) + '_' + str(args.n_clusters), capture_io=True)
    print(mdec)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        mdec.fit(full_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs,
                 save_path=CONFIG.CHECKPOINT_PATH)
        print("Finish!!!")

    finally:
        exp.end()
コード例 #2
0
def pretrain_ddec(args):
    print("Pretraining...")

    print("Loading dataset...")
    with open(os.path.join(args.text_embedding_dir, 'word_embedding.p'), "rb") as f:
        embedding_model = cPickle.load(f)
    with open(os.path.join(args.text_embedding_dir, 'word_idx.json'), "r", encoding='utf-8') as f:
        word_idx = json.load(f)
    train_dataset, test_dataset = load_pretrain_data(args.image_dir, word_idx[1], args, CONFIG)
    print("Loading dataset completed")

    dualnet = DualNet(pretrained_embedding=embedding_model, text_features=args.text_features, z_dim=args.z_dim, n_classes=args.n_classes)
    if args.resume:
        print("loading model...")
        dualnet.load_model("/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt")
    exp = Experiment("Dualnet_pretrain_" + str(args.z_dim), capture_io=True)
    print(dualnet)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        dualnet.fit(train_dataset,  test_dataset, args=args,
                 save_path="/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt")
        print("Finish!!!")

    finally:
        exp.end()
コード例 #3
0
def train_reconstruction_all(args):
    device = torch.device(args.gpu)

    df_input_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix + "_" + args.target_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    exp = Experiment(args.target_modal + " SDAE " + str(args.latent_dim),
                     capture_io=True)
    try:
        for arg, value in vars(args).items():
            exp.param(arg, value)
        print("Loading dataset...")

        train_dataset, val_dataset = load_autoencoder_data(
            df_input_data, CONFIG)
        print("Loading dataset completed")
        train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle), \
                                   DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

        sdae = StackedDAE(input_dim=args.input_dim,
                          z_dim=args.latent_dim,
                          binary=False,
                          encodeLayer=[500, 500, 2000],
                          decodeLayer=[2000, 500, 500],
                          activation="relu",
                          dropout=args.dropout,
                          device=device)
        if args.resume:
            print("resume from checkpoint")
            sdae.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix + "_" +
                    args.target_modal + "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + "_all.pt"))
        else:
            sdae.pretrain(train_loader,
                          val_loader,
                          lr=args.lr,
                          batch_size=args.batch_size,
                          num_epochs=args.pretrain_epochs,
                          corrupt=0.2,
                          loss_type="mse")
        sdae.fit(train_loader,
                 val_loader,
                 lr=args.lr,
                 num_epochs=args.epochs,
                 corrupt=0.2,
                 loss_type="mse",
                 save_path=os.path.join(
                     CONFIG.CHECKPOINT_PATH, args.prefix + "_" +
                     args.target_modal + "_" + args.target_dataset + "_sdae_" +
                     str(args.latent_dim) + "_all.pt"))
    finally:
        exp.end()
コード例 #4
0
def train_bayes(params):
    """
    Wrapper around train function to serve as objective function for Gaussian
    optimization in scikit-optimize routine gp_minimize.

    Arguments:
    ----------
        params: list, shape=[nb_layers + 2,]
        List of search space dimensions. Entries have to be tuples 
        (lower_bound, upper_bound) for Reals or Integers.

    Returns:
    --------
        tbd

    """
    # Create Hyperdash hd_experiment
    hd_exp = Experiment(project_name)

    # Translate params into format understood by train function
    # n_layer = 4
    # layer_sizes = hd_exp.param('layer_sizes', (2**np.array(params[:n_layer])).tolist())
    # learning_rate = hd_exp.param('learning rate', 10**params[n_layer])
    # mini_batch_size = hd_exp.param('mini batch size', int(2**params[n_layer + 1]))
    # pkeep = hd_exp.param('dropout prob', 1)
    # hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep]
    # hyper_param_str = make_hyper_param_str(hyper_params)

    layer_sizes = [4096] * 4
    learning_rate = hd_exp.param('learning rate', 10**params[0])
    mini_batch_size = hd_exp.param('mini batch size', int(2**params[1]))
    pkeep = hd_exp.param('dropout prob', 1)
    hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep]
    hyper_param_str = make_hyper_param_str(hyper_params)

    # Call train function
    tic = time.time()
    logger.info('Start training for ' + hyper_param_str)
    log_df, best_error = train(train_tuple, validation_tuple, hyper_params,
                               nb_epochs, random_seed, hd_exp, project_dir)
    elapsed_time = time.time() - tic
    logger.info('Finished training in {} s.'.format(elapsed_time))

    # Writing Pandas log file to csv file on disk.
    logger.info('Writing pandas DF log to disk.')
    log_df.to_csv(project_dir + '/' + hyper_param_str + '/data_df.csv')

    # Finish Hyperdash Experiment
    hd_exp.end()

    return best_error
コード例 #5
0
ファイル: optimization.py プロジェクト: taichimaeda/EE
    def objective(self, params):
        """
        objective function to optimize

        :param params: hyperparamters for optimizer
        :return: maximum validation accuracy
        :rtype: float
        """
        # get instances
        dataset = Datasets.get(self.dataset_name)
        model = Models.get(self.model_name, dataset=dataset)
        optimizer = Optimizers.get(self.optimizer_name, params=params)

        # configure hyperdash experiment
        hd_exp = HyperdashExperiment(
            f'{self.dataset_name}',
            api_key_getter=lambda: self.config['hyperdash']['api_key'])
        hd_exp.param('dataset_name', self.dataset_name)
        hd_exp.param('model_name', self.model_name)
        hd_exp.param('optimizer_name', self.optimizer_name)

        for k, v in params.items():
            hd_exp.param(k, v)

        # set callbacks
        callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      hd_exp),
            EarlyStopping('val_accuracy',
                          patience=10,
                          min_delta=0.01,
                          verbose=1),
            TerminateOnNaN()
        ]

        # get data
        (x_train, y_train), *_ = dataset.get_batch()

        # start learning
        model.compile(loss=self.loss,
                      optimizer=optimizer,
                      metrics=['accuracy'])
        history = model.fit(x_train,
                            y_train,
                            batch_size=self.batch_size,
                            epochs=self.epochs,
                            callbacks=callbacks,
                            validation_split=0.2,
                            verbose=2)

        # stop hyperdash experiment
        hd_exp.end()

        # return maximum validation accuracy
        val_accuracy = np.array(history.history['val_accuracy'])
        return max(val_accuracy) * (-1)
コード例 #6
0
def main():
    """Start training."""
    exp = Experiment("diffrend test")

    # Parse args
    opt = Parameters().parse()

    for key, val in opt.__dict__.items():
        exp.param(key, val)

    # Create dataset loader
    dataset_load = Dataset_load(opt)

    # Create GAN
    gan = GAN(opt, dataset_load, exp)

    # Train gan
    gan.train()
コード例 #7
0
ファイル: cli.py プロジェクト: taketrung1988/hyperdash-sdk-py
def demo(args=None):
    from_file = get_api_key_from_file()
    from_env = get_api_key_from_env()
    api_key = from_env or from_file

    if not api_key:
        print("""
            `hyperdash demo` requires a Hyperdash API key. Try setting your API key in the
            HYPERDASH_API_KEY environment variable, or in a hyperdash.json file in the local
            directory or your user's home directory with the following format:

            {
                "api_key": "<YOUR_API_KEY>"
            }
        """)
        return

    print("""
Running the following program:

    from hyperdash import Experiment
    exp = Experiment("Dogs vs. Cats")

    # Parameters
    estimators = exp.param("Estimators", 500)
    epochs = exp.param("Epochs", 5)
    batch = exp.param("Batch Size", 64)

    for epoch in xrange(1, epochs + 1):
        accuracy = 1. - 1./epoch
        loss = float(epochs - epoch)/epochs
        print("Training model (epoch {})".format(epoch))
        time.sleep(1)

        # Metrics
        exp.metric("Accuracy", accuracy)
        exp.metric("Loss", loss)

    exp.end()
    """)
    from hyperdash import Experiment
    exp = Experiment("Dogs vs. Cats")

    # Parameters
    estimators = exp.param("Estimators", 500)
    epochs = exp.param("Epochs", 5)
    batch = exp.param("Batch Size", 64)

    for epoch in xrange(epochs):
        print("Training model (epoch {})".format(epoch))

        accuracy = 1. - 1. / (epoch + 1)
        loss = float(epochs - epoch) / (epochs + 1)

        # Metrics
        exp.metric("Accuracy", accuracy)
        exp.metric("Loss", loss)

        time.sleep(1)

    exp.end()
コード例 #8
0
agent = DDPG(nb_states, nb_actions, args)
evaluate = Evaluator(args.validate_episodes,
    args.validate_steps, args.output, max_episode_length=args.max_episode_length)

exp = None

if args.mode == 'train':
    exp = Experiment("sim2real-ddpg-real-cheetah")
    for arg in ["env", "rate", "prate", "hidden1", "hidden2", "warmup", "discount",
                "bsize", "rmsize", "window_length", "tau", "ou_theta", "ou_sigma", "ou_mu",
                "validate_episodes", "max_episode_length", "validate_steps", "init_w",
                "train_iter", "epsilon", "seed", "resume"]:
        arg_val = getattr(args, arg)

    import socket
    exp.param("host", socket.gethostname())

    train(args, args.train_iter, agent, env, evaluate,
        args.validate_steps, args.output, max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

    # when done
    exp.end()

elif args.mode == 'test':
    test(args.validate_episodes, agent, env, evaluate, args.resume,
        visualize=True, debug=args.debug)

else:
    raise RuntimeError('undefined mode {}'.format(args.mode))
コード例 #9
0
def train_multidec(args):
    print("Training weight calc")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix_csv + "_pca_normalized_image_encoded_" +
        args.target_dataset + ".csv"),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH,
        args.prefix_csv + "_text_doc2vec_" + args.target_dataset + ".csv"),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1
    #n_clusters = args.n_clusters

    exp = Experiment(args.prefix_csv + "_ODEC", capture_io=True)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        acc_list = []
        nmi_list = []
        f_1_list = []
        for fold_idx in range(args.start_fold, args.fold):
            print("Current fold: ", fold_idx)
            df_train = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH, "train_" + str(fold_idx) + "_" +
                args.target_dataset + "_label.csv"),
                                   index_col=0,
                                   encoding='utf-8-sig')
            if args.sampled_n is not None:
                df_train = df_train.sample(n=args.sampled_n, random_state=42)
            df_test = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH, "test_" + str(fold_idx) + "_" +
                args.target_dataset + "_label.csv"),
                                  index_col=0,
                                  encoding='utf-8-sig')
            print("Loading dataset...")
            full_dataset, train_dataset, val_dataset = load_semi_supervised_csv_data(
                df_image_data, df_text_data, df_train, df_test, CONFIG)
            print("\nLoading dataset completed")

            image_encoder = MDEC_encoder(input_dim=args.input_dim,
                                         z_dim=args.latent_dim,
                                         n_clusters=n_clusters,
                                         encodeLayer=[500, 500, 2000],
                                         activation="relu",
                                         dropout=0)
            image_encoder.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_model + "_image"
                    "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            # image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_image_sdae_" + str(fold_idx)) + ".pt")
            text_encoder = MDEC_encoder(input_dim=args.input_dim,
                                        z_dim=args.latent_dim,
                                        n_clusters=n_clusters,
                                        encodeLayer=[500, 500, 2000],
                                        activation="relu",
                                        dropout=0)
            text_encoder.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_model + "_text"
                    "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            # text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_text_sdae_" + str(fold_idx)) + ".pt")
            mdec = MultiDEC(device=device,
                            image_encoder=image_encoder,
                            text_encoder=text_encoder,
                            ours=args.ours,
                            use_prior=args.use_prior,
                            fl=args.fl,
                            n_clusters=n_clusters)

            mdec.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_odec_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            mdec.to(device)
            mdec.eval()
            wcalc = WeightCalc(device=device,
                               ours=args.ours,
                               use_prior=args.use_prior,
                               input_dim=args.input_dim,
                               n_clusters=n_clusters)
            wcalc.fit_predict(
                mdec,
                full_dataset,
                train_dataset,
                val_dataset,
                args,
                CONFIG,
                lr=args.lr,
                batch_size=args.batch_size,
                num_epochs=args.epochs,
                save_path=os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_wcalc_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt",
                tol=args.tol,
                kappa=args.kappa)
            acc_list.append(wcalc.acc)
            nmi_list.append(wcalc.nmi)
            f_1_list.append(wcalc.f_1)
        print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" %
              (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list)))

    finally:
        exp.end()
コード例 #10
0
def train_reconstruction(args):
    device = torch.device(args.gpu)
    print("Loading dataset...")
    train_dataset, val_dataset = load_imgseq_data(args, CONFIG)
    print("Loading dataset completed")
    train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\
             DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    #imgseq_encoder = imgseq_model.RNNEncoder(args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True)
    #imgseq_decoder = imgseq_model.RNNDecoder(CONFIG.MAX_SEQUENCE_LEN, args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True)
    t1 = CONFIG.MAX_SEQUENCE_LEN
    t2 = int(math.floor((t1 - 3) / 1) + 1)  # "2" means stride size
    t3 = int(math.floor((t2 - 3) / 1) + 1)
    imgseq_encoder = imgseq_model.ConvolutionEncoder(
        embedding_dim=args.embedding_dim,
        t3=t3,
        filter_size=300,
        filter_shape=3,
        latent_size=1000)
    imgseq_decoder = imgseq_model.DeconvolutionDecoder(
        embedding_dim=args.embedding_dim,
        t3=t3,
        filter_size=300,
        filter_shape=3,
        latent_size=1000)
    if args.resume:
        print("Restart from checkpoint")
        checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH,
                                             args.resume),
                                map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        imgseq_encoder.load_state_dict(checkpoint['imgseq_encoder'])
        imgseq_decoder.load_state_dict(checkpoint['imgseq_decoder'])
    else:
        print("Start from initial")
        start_epoch = 0

    imgseq_autoencoder = imgseq_model.ImgseqAutoEncoder(
        imgseq_encoder, imgseq_decoder)
    criterion = nn.MSELoss().to(device)
    imgseq_autoencoder.to(device)

    optimizer = AdamW(imgseq_autoencoder.parameters(),
                      lr=1.,
                      weight_decay=args.weight_decay,
                      amsgrad=True)
    step_size = args.half_cycle_interval * len(train_loader)
    clr = cyclical_lr(step_size,
                      min_lr=args.lr,
                      max_lr=args.lr * args.lr_factor)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])

    if args.resume:
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])

    exp = Experiment("Image-sequence autoencoder " + str(args.latent_size),
                     capture_io=False)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        imgseq_autoencoder.train()

        for epoch in range(start_epoch, args.epochs):
            print("Epoch: {}".format(epoch))
            for steps, batch in enumerate(train_loader):
                torch.cuda.empty_cache()
                feature = Variable(batch).to(device)
                optimizer.zero_grad()
                feature_hat = imgseq_autoencoder(feature)
                loss = criterion(feature_hat, feature)
                loss.backward()
                optimizer.step()
                scheduler.step()

                if (steps * args.batch_size) % args.log_interval == 0:
                    print("Epoch: {} at {} lr: {}".format(
                        epoch, str(datetime.datetime.now()),
                        str(scheduler.get_lr())))
                    print("Steps: {}".format(steps))
                    print("Loss: {}".format(loss.detach().item()))
                    input_data = feature[0]
                del feature, feature_hat, loss

            exp.log("\nEpoch: {} at {} lr: {}".format(
                epoch, str(datetime.datetime.now()), str(scheduler.get_lr())))
            _avg_loss = eval_reconstruction(imgseq_autoencoder, criterion,
                                            val_loader, device)
            exp.log("\nEvaluation - loss: {}".format(_avg_loss))

            util.save_models(
                {
                    'epoch': epoch + 1,
                    'imgseq_encoder': imgseq_encoder.state_dict(),
                    'imgseq_decoder': imgseq_decoder.state_dict(),
                    'avg_loss': _avg_loss,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                }, CONFIG.CHECKPOINT_PATH,
                "imgseq_autoencoder_" + str(args.latent_size))

        print("Finish!!!")

    finally:
        exp.end()
コード例 #11
0
from torch.utils.data import DataLoader
import numpy as np
from hyperdash import Experiment

from inversegraphics_generator.img_dataset import IqImgDataset
from inversegraphics_generator.iqtest_objs import get_data_dir
from inversegraphics_generator.resnet50 import MultiResNet, ContrastiveLoss

EPOCHS = 40
BATCH = 64
LEARNING_RATE = 0.0001
SIZE = 1000
MARGIN = 2

exp = Experiment("[ig] cnn-siamese2")
exp.param("epoch", EPOCHS)
exp.param("size", SIZE)
exp.param("batch", BATCH)
exp.param("learning rate", LEARNING_RATE)

# ds = IqImgDataset("/data/lisa/data/iqtest/iqtest-dataset-ambient.h5", "train/labeled", max_size=SIZE)
ds = IqImgDataset(os.path.join(get_data_dir(), "test.h5"),
                  "train/labeled",
                  max_size=SIZE)
dl = DataLoader(ds, batch_size=BATCH, shuffle=True, num_workers=0)

model = MultiResNet(siamese=True)  #.cuda()
# Loss and optimizer
criterion_contrast = ContrastiveLoss(MARGIN)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
コード例 #12
0
#         net.load_state_dict(checkpoint['state_dict'])
#         print("MODEL LOADED, CONTINUING TRAINING")
#         return "TRAINING AVG LOSS: {}\n" \
#                "TRAINING AVG DIFF: {}".format(
#             checkpoint["epoch_avg_loss"], checkpoint["epoch_avg_diff"])
#     else:
#         if optional:
#             pass  # model loading was optional, so nothing to do
#         else:
#             # shit, no model
#             raise Exception("model couldn't be found:", MODEL_PATH_BEST)

loss_function = nn.MSELoss()
if hyperdash_support:
    exp = Experiment("[sim2real] lstm - real v3")
    exp.param("exp", EXPERIMENT)
    exp.param("layers", LSTM_LAYERS)
    exp.param("nodes", HIDDEN_NODES)

# if TRAIN:
optimizer = optim.Adam(net.parameters())
# if CONTINUE:
#     old_model_string = loadModel(optional=True)
#     print(old_model_string)
# else:
#     old_model_string = loadModel(optional=False)

loss_history = [np.inf]  # very high loss because loss can't be empty for min()

for epoch in np.arange(EPOCHS):
コード例 #13
0
        net.load_state_dict(checkpoint['state_dict'])
        return "TRAINING AVG LOSS: {}\n" \
               "TRAINING AVG DIFF: {}".format(
            checkpoint["epoch_avg_loss"], checkpoint["epoch_avg_diff"])
    else:
        if optional:
            pass  # model loading was optional, so nothing to do
        else:
            #shit, no model
            raise Exception("model couldn't be found:", MODEL_PATH_BEST)


loss_function = nn.MSELoss()
if hyperdash_support:
    exp = Experiment("simple lstm - fl4")
    exp.param("layers", LSTM_LAYERS)
    exp.param("nodes", HIDDEN_NODES)

if TRAIN:
    optimizer = optim.Adam(net.parameters())
    if CONTINUE:
        old_model_string = loadModel(optional=True)
        print(old_model_string)
else:
    old_model_string = loadModel(optional=False)

loss_history = [9999999
                ]  # very high loss because loss can't be empty for min()
# h0 = Variable(torch.randn(, 3, 20))
# c0 = Variable(torch.randn(2, 3, 20))
コード例 #14
0
    def test_experiment_handles_numpy_numbers(self):
        nums_to_test = [
            ("int_", np.int_()),
            ("intc", np.intc()),
            ("intp", np.intp()),
            ("int8", np.int8()),
            ("int16", np.int16()),
            ("int32", np.int32()),
            ("int64", np.int64()),
            ("uint8", np.uint8()),
            ("uint16", np.uint16()),
            ("uint32", np.uint32()),
            ("uint64", np.uint64()),
            ("float16", np.float16()),
            ("float32", np.float32()),
            ("float64", np.float64()),
        ]
        # Make sure the SDK doesn't choke and JSON serialization works
        exp = Experiment("MNIST")
        for name, num in nums_to_test:
            exp.metric("test_metric_{}".format(name), num)
            exp.param("test_param_{}".format(name), num)
        exp.end()

        # Test params match what is expected
        params_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "params" in payload:
                params_messages.append(payload)

        expected_params = []
        for name, num in nums_to_test:
            obj = {
                "params": {},
                "is_internal": False,
            }
            obj["params"]["test_param_{}".format(name)] = num
            obj["is_internal"] = False
            expected_params.append(obj)

        assert len(expected_params) == len(params_messages)
        for i, message in enumerate(params_messages):
            print(message)
            print(expected_params[i])
            assert message == expected_params[i]

        # Test metrics match what is expected
        metrics_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "name" in payload:
                metrics_messages.append(payload)

        expected_metrics = []
        for name, num in nums_to_test:
            expected_metrics.append({
                "name": "test_metric_{}".format(name),
                "value": num,
                "is_internal": False,
            })

        assert len(expected_metrics) == len(metrics_messages)
        for i, message in enumerate(metrics_messages):
            assert message == expected_metrics[i]
    def gen_estimator(period=None):
        resnet_size = int(flags_obj.resnet_size)
        data_format = flags_obj.data_format
        batch_size = flags_obj.batch_size
        resnet_version = int(flags_obj.resnet_version)
        loss_scale = flags_core.get_loss_scale(flags_obj)
        dtype_tf = flags_core.get_tf_dtype(flags_obj)
        num_epochs_per_decay = flags_obj.num_epochs_per_decay
        learning_rate_decay_factor = flags_obj.learning_rate_decay_factor
        end_learning_rate = flags_obj.end_learning_rate
        learning_rate_decay_type = flags_obj.learning_rate_decay_type
        weight_decay = flags_obj.weight_decay
        zero_gamma = flags_obj.zero_gamma
        lr_warmup_epochs = flags_obj.lr_warmup_epochs
        base_learning_rate = flags_obj.base_learning_rate
        use_resnet_d = flags_obj.use_resnet_d
        use_dropblock = flags_obj.use_dropblock
        dropblock_kp = [float(be) for be in flags_obj.dropblock_kp]
        label_smoothing = flags_obj.label_smoothing
        momentum = flags_obj.momentum
        bn_momentum = flags_obj.bn_momentum
        train_epochs = flags_obj.train_epochs
        piecewise_lr_boundary_epochs = [
            int(be) for be in flags_obj.piecewise_lr_boundary_epochs
        ]
        piecewise_lr_decay_rates = [
            float(dr) for dr in flags_obj.piecewise_lr_decay_rates
        ]
        use_ranking_loss = flags_obj.use_ranking_loss
        use_se_block = flags_obj.use_se_block
        use_sk_block = flags_obj.use_sk_block
        mixup_type = flags_obj.mixup_type
        dataset_name = flags_obj.dataset_name
        kd_temp = flags_obj.kd_temp
        no_downsample = flags_obj.no_downsample
        anti_alias_filter_size = flags_obj.anti_alias_filter_size
        anti_alias_type = flags_obj.anti_alias_type
        cls_loss_type = flags_obj.cls_loss_type
        logit_type = flags_obj.logit_type
        embedding_size = flags_obj.embedding_size
        pool_type = flags_obj.pool_type
        arc_s = flags_obj.arc_s
        arc_m = flags_obj.arc_m
        bl_alpha = flags_obj.bl_alpha
        bl_beta = flags_obj.bl_beta
        exp = None

        if install_hyperdash and flags_obj.use_hyperdash:
            exp = Experiment(flags_obj.model_dir.split("/")[-1])
            resnet_size = exp.param("resnet_size", int(flags_obj.resnet_size))
            batch_size = exp.param("batch_size", flags_obj.batch_size)
            exp.param("dtype", flags_obj.dtype)
            learning_rate_decay_type = exp.param(
                "learning_rate_decay_type", flags_obj.learning_rate_decay_type)
            weight_decay = exp.param("weight_decay", flags_obj.weight_decay)
            zero_gamma = exp.param("zero_gamma", flags_obj.zero_gamma)
            lr_warmup_epochs = exp.param("lr_warmup_epochs",
                                         flags_obj.lr_warmup_epochs)
            base_learning_rate = exp.param("base_learning_rate",
                                           flags_obj.base_learning_rate)
            use_dropblock = exp.param("use_dropblock", flags_obj.use_dropblock)
            dropblock_kp = exp.param(
                "dropblock_kp", [float(be) for be in flags_obj.dropblock_kp])
            piecewise_lr_boundary_epochs = exp.param(
                "piecewise_lr_boundary_epochs",
                [int(be) for be in flags_obj.piecewise_lr_boundary_epochs])
            piecewise_lr_decay_rates = exp.param(
                "piecewise_lr_decay_rates",
                [float(dr) for dr in flags_obj.piecewise_lr_decay_rates])
            mixup_type = exp.param("mixup_type", flags_obj.mixup_type)
            dataset_name = exp.param("dataset_name", flags_obj.dataset_name)
            exp.param("autoaugment_type", flags_obj.autoaugment_type)

        classifier = tf.estimator.Estimator(
            model_fn=model_function,
            model_dir=flags_obj.model_dir,
            config=run_config,
            params={
                'resnet_size': resnet_size,
                'data_format': data_format,
                'batch_size': batch_size,
                'resnet_version': resnet_version,
                'loss_scale': loss_scale,
                'dtype': dtype_tf,
                'num_epochs_per_decay': num_epochs_per_decay,
                'learning_rate_decay_factor': learning_rate_decay_factor,
                'end_learning_rate': end_learning_rate,
                'learning_rate_decay_type': learning_rate_decay_type,
                'weight_decay': weight_decay,
                'zero_gamma': zero_gamma,
                'lr_warmup_epochs': lr_warmup_epochs,
                'base_learning_rate': base_learning_rate,
                'use_resnet_d': use_resnet_d,
                'use_dropblock': use_dropblock,
                'dropblock_kp': dropblock_kp,
                'label_smoothing': label_smoothing,
                'momentum': momentum,
                'bn_momentum': bn_momentum,
                'embedding_size': embedding_size,
                'train_epochs': train_epochs,
                'piecewise_lr_boundary_epochs': piecewise_lr_boundary_epochs,
                'piecewise_lr_decay_rates': piecewise_lr_decay_rates,
                'with_drawing_bbox': flags_obj.with_drawing_bbox,
                'use_ranking_loss': use_ranking_loss,
                'use_se_block': use_se_block,
                'use_sk_block': use_sk_block,
                'mixup_type': mixup_type,
                'kd_temp': kd_temp,
                'no_downsample': no_downsample,
                'dataset_name': dataset_name,
                'anti_alias_filter_size': anti_alias_filter_size,
                'anti_alias_type': anti_alias_type,
                'cls_loss_type': cls_loss_type,
                'logit_type': logit_type,
                'arc_s': arc_s,
                'arc_m': arc_m,
                'pool_type': pool_type,
                'bl_alpha': bl_alpha,
                'bl_beta': bl_beta,
                'train_steps': total_train_steps,
            })
        return classifier, exp
コード例 #16
0
    sys.path.append("./")
    from models.burgers_train_separate import BurgersSeparate
    parser = argparse.ArgumentParser()
    parser.add_argument("--niter", default=10000, type=int)
    parser.add_argument("--scipyopt", default=False)
    parser.add_argument("--name", default="default")
    parser.add_argument("--traindata", nargs="+")
    parser.add_argument("--testdata",
                        default="../MyData/burgers_polynominal.mat")
    args = parser.parse_args()
    logname = f"log/burgers_{args.name}.log"
    figurename = f"Burgers_{args.name}"
    # filen = "../MyData/burgers_cos.mat"

    exp = Experiment(args.name)
    exp.param("niter", args.niter)
    exp.param("scipyopt", args.scipyopt)
    exp.param("testdata", args.testdata)
    for i, n in enumerate(args.traindata):
        exp.param(f"traindata{i}", n)

    dataloader = DataLoader(args.traindata, args.testdata, 10.0, 8.0)
    sol_data = dataloader.get_solver_data(20000)
    idn_data = dataloader.get_train_batch()

    u_layers = [[2, 50, 50, 50, 50, 1] for _ in range(len(args.traindata))]
    pde_layers = [3, 100, 100, 1]
    layers = [2, 50, 50, 50, 50, 1]

    idn_lbs = idn_data["idn_lbs"]
    idn_ubs = idn_data["idn_ubs"]
コード例 #17
0
ファイル: trainer.py プロジェクト: mzntaka0/fire
class BaseTrainer(_BaseTrainer):
    """ Base trainer to make pytorch training be easier.

    Args:
        data-augmentation (bool): Crop randomly and add random noise for data augmentation.
        epoch (int): Number of epochs to train.
        opt (str): Optimization method.
        gpu (bool): Use GPU.
        seed (str): Random seed to train.
        train (str): Path to training image-pose list file.
        val (str): Path to validation image-pose list file.
        batchsize (int): Learning minibatch size.
        out (str): Output directory.
        resume (str): Initialize the trainer from given file.
            The file name is 'epoch-{epoch number}.iter'.
        resume_model (str): Load model definition file to use for resuming training
            (it\'s necessary when you resume a training).
            The file name is 'epoch-{epoch number}.model'.
        resume_opt (str): Load optimization states from this file
            (it\'s necessary when you resume a training).
            The file name is 'epoch-{epoch number}.state'.
    """

    def __init__(self, **kwargs):
        self.data_augmentation = kwargs['data_augmentation']
        self.epoch = kwargs['epoch']
        self.gpu = (kwargs['gpu'] >= 0)
        self.opt = kwargs['opt']
        self.seed = kwargs['seed']
        self.train = kwargs['train']
        self.val = kwargs['val']
        self.batchsize = kwargs['batchsize']
        self.out = kwargs['out']
        self.resume = kwargs['resume']
        self.resume_model = kwargs['resume_model']
        self.resume_opt = kwargs['resume_opt']
        self.hyperdash = kwargs['hyperdash']
        if self.hyperdash:
            self.experiment = Experiment(self.hyperdash)
            for key, val in kwargs.items():
                self.experiment.param(key, val)
        # validate arguments.
        self._validate_arguments()
        self.lowest_loss = 0
        self.device = torch.device('cuda' if kwargs['gpu'] >= 0 else 'cpu')
        #self.experiment.log_multiple_params(kwargs)
        self.dataloader = torch.utils.data.DataLoader

    def _validate_arguments(self):
        if self.seed is not None and self.data_augmentation:
            raise NotSupportedError('It is not supported to fix random seed for data augmentation.')
        if self.gpu and not torch.cuda.is_available():
            raise GPUNotFoundError('GPU is not found.')
        #for path in (self.train, self.val):
        #    if not os.path.isfile(path):
        #        raise FileNotFoundError('{0} is not found.'.format(path))
        if self.opt not in ('MomentumSGD', 'Adam'):
            raise UnknownOptimizationMethodError(
                '{0} is unknown optimization method.'.format(self.opt))
        if self.resume is not None:
            for path in (self.resume, self.resume_model, self.resume_opt):
                if not os.path.isfile(path):
                    raise FileNotFoundError('{0} is not found.'.format(path))

    # TODO: make it acceptable multiple optimizer, or define out of this trainer.
    def _get_optimizer(self, model, **kwargs):
        if self.opt == 'MomentumSGD':
            optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        elif self.opt == "Adam":
            optimizer = optim.Adam(model.parameters())
        else:
            try:
                optimizer = getattr(optim, self.opt)(**kwargs)
            except OptimNotSupportedError:
                print("This optim is not available. See https://pytorch.org/docs/stable/optim.html")
        return optimizer

    def forward(self, batch, model, criterion):
        data, target = map(lambda d: d.to(self.device), batch)
        output = model(data)
        loss = criterion(output, target)
        return loss

    def _train(self, model, optimizer, criterion, train_iter, logger, start_time, log_interval=10):
        model.train()
        loss_sum = 0.0
        for iteration, batch in enumerate(tqdm(train_iter, desc='this epoch'), 1):
            optimizer.zero_grad()
            loss = self.forward(batch, model, criterion, isTest=False)
            loss_sum += loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 500)
            optimizer.step()
            if self.hyperdash:
                self.experiment.metric("loss", int(loss.cpu().data.numpy()), log=False)
            if iteration % log_interval == 0:
                log = 'elapsed_time: {0}, loss: {1}'.format(time.time() - start_time, loss.data[0])
                logger.write(log)
        return loss_sum / len(train_iter)

    def _test(self, model, test_iter, criterion, logger, start_time):
        model.eval()
        test_loss = 0
        for batch in test_iter:
            loss = self.forward(batch, model, criterion, isTest=True)
            print('Test loss: {}'.format(loss.data))
            test_loss += loss.item()
        test_loss /= len(test_iter)
        log = 'elapsed_time: {0}, validation/loss: {1}'.format(time.time() - start_time, test_loss)
        if self.hyperdash:
            self.experiment.metric('test_loss', int(test_loss.cpu().data.numpy()))
        logger.write(log)
        return test_loss

    def _checkpoint(self, epoch, model, optimizer, logger):
        filename = os.path.join(self.out, 'epoch-{0}'.format(epoch + 1))
        torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
        torch.save(model.state_dict(), filename + '.model')
        torch.save(optimizer.state_dict(), filename + '.state')

    def _best_checkpoint(self, epoch, model, optimizer, logger):
        filename = os.path.join(self.out, 'best_model')
        torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
        torch.save(model.state_dict(), filename + '.model')
        torch.save(optimizer.state_dict(), filename + '.state')

    def fit(self, model, train_data, val_data, criterion):
        """ Execute training """
        # set random seed.
        if self.seed is not None:
            random.seed(self.seed)
            torch.manual_seed(self.seed)
            if self.gpu:
                torch.cuda.manual_seed(self.seed)
        # initialize model to train.
        if self.resume_model:
            model.load_state_dict(torch.load(self.resume_model))
        # prepare gpu.
        if self.gpu:
            model.cuda()
        # load the datasets.
        train_iter = self.dataloader(train_data, batch_size=self.batchsize, shuffle=True)
        val_iter = self.dataloader(val_data, batch_size=3, shuffle=False)
        # set up an optimizer.
        optimizer = self._get_optimizer(model)
        if self.resume_opt:
            optimizer.load_state_dict(torch.load(self.resume_opt))
        # set intervals.
        val_interval = 3
        resume_interval = self.epoch / 10
        log_interval = 10
        # set logger and start epoch.
        logger = TrainLogger(self.out)
        start_epoch = 0
        if self.resume:
            resume = torch.load(self.resume)
            start_epoch = resume['epoch']
            logger.load_state_dict(resume['logger'])
        # start training.
        start_time = time.time()
        loss = 0
        for epoch in trange(start_epoch, self.epoch, initial=start_epoch, total=self.epoch, desc='     total'):
            self._train(model, optimizer, criterion, train_iter, log_interval, logger, start_time)
            if (epoch) % val_interval == 0:
                loss = self._test(model, val_iter, criterion, logger, start_time)
                if self.lowest_loss == 0 or self.lowest_loss > loss:
                    logger.write('Best model updated. loss: {} => {}'.format(self.lowest_loss, loss))
                    self._best_checkpoint(epoch, model, optimizer, logger)
                    self.lowest_loss = loss
            if (epoch + 1) % resume_interval == 0:
                self._checkpoint(epoch, model, optimizer, logger)

        if self.hyperdash:
            self.experiment.end()

    @staticmethod
    def get_args():
        # arg definition
        parser = argparse.ArgumentParser(
            description='Training pose net for comparison \
            between chainer and pytorch about implementing DeepPose.')
        parser.add_argument(
            '--data-augmentation', '-a', action='store_true', help='Crop randomly and add random noise for data augmentation.')
        parser.add_argument(
            '--epoch', '-e', type=int, default=100, help='Number of epochs to train.')
        parser.add_argument(
            '--opt', '-o', type=str, default='Adam',
            choices=['MomentumSGD', 'Adam'], help='Optimization method.')
        parser.add_argument(
            '--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU).')
        parser.add_argument(
            '--seed', '-s', type=int, help='Random seed to train.')
        parser.add_argument(
            '--train', type=str, default='data/train', help='Path to training image-pose list file.')
        parser.add_argument(
            '--val', type=str, default='data/test', help='Path to validation image-pose list file.')
        parser.add_argument(
            '--batchsize', type=int, default=32, help='Learning minibatch size.')
        parser.add_argument(
            '--out', default='result', help='Output directory')
        parser.add_argument(
            '--resume', default=None,
            help='Initialize the trainer from given file. \
            The file name is "epoch-{epoch number}.iter".')
        parser.add_argument(
            '--resume-model', type=str, default=None,
            help='Load model definition file to use for resuming training \
            (it\'s necessary when you resume a training). \
            The file name is "epoch-{epoch number}.mode"')
        parser.add_argument(
            '--resume-opt', type=str, default=None,
            help='Load optimization states from this file \
            (it\'s necessary when you resume a training). \
            The file name is "epoch-{epoch number}.state"')
        parser.add_argument(
            '--hyperdash', type=str, default=None,
            help='If you use hyperdash logging, enter here the name of experiment. Before using, you have to login to hyperdash with "hyperdash login --github". The default is None that means no logging with hyperdash')
        args = parser.parse_args()
        return args
コード例 #18
0
ファイル: _9_multi_dec.py プロジェクト: cdsnlab/AIoTPlaceness
def train_multidec(args):
    print("Training multidec")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.image_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.text_csv),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    short_code_array = np.array(df_label.index)
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1
    short_code_train, short_code_val, label_train, label_val = train_test_split(
        short_code_array, label_array, test_size=0.2, random_state=42)
    df_train = pd.DataFrame(data=label_train,
                            index=short_code_train,
                            columns=df_label.columns)
    df_val = pd.DataFrame(data=label_val,
                          index=short_code_val,
                          columns=df_label.columns)
    print("Loading dataset...")
    train_dataset, val_dataset = load_multi_csv_data(df_image_data,
                                                     df_text_data, df_train,
                                                     df_val, CONFIG)
    print("Loading dataset completed")

    image_encoder = MDEC_encoder(input_dim=args.input_dim,
                                 z_dim=args.latent_dim,
                                 n_clusters=n_clusters,
                                 encodeLayer=[500, 500, 2000],
                                 activation="relu",
                                 dropout=0)
    image_encoder.load_model(
        os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" +
                     str(args.latent_dim)) + ".pt")
    text_encoder = MDEC_encoder(input_dim=args.input_dim,
                                z_dim=args.latent_dim,
                                n_clusters=n_clusters,
                                encodeLayer=[500, 500, 2000],
                                activation="relu",
                                dropout=0)
    text_encoder.load_model(
        os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" +
                     str(args.latent_dim)) + ".pt")
    mdec = MultiDEC(device=device,
                    image_encoder=image_encoder,
                    text_encoder=text_encoder,
                    n_clusters=n_clusters)
    exp = Experiment("MDEC " + str(args.latent_dim), capture_io=True)
    print(mdec)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        mdec.fit(train_dataset,
                 val_dataset,
                 lr=args.lr,
                 batch_size=args.batch_size,
                 num_epochs=args.epochs,
                 save_path=CONFIG.CHECKPOINT_PATH)
        print("Finish!!!")

    finally:
        exp.end()
コード例 #19
0
nb_states = env.observation_space.shape[0]
nb_actions = env.action_space.shape[0]

agent = DDPG(nb_states, nb_actions, args)
evaluate = Evaluator(args.validate_episodes,
                     args.validate_steps,
                     args.output,
                     max_episode_length=args.max_episode_length)

exp = None

if args.mode == 'train':
    if hyperdash_support:
        exp = Experiment("sim2real-ddpg-simplus-cheetah")
        exp.param("model", MODEL_PATH)
        for arg in [
                "env", "rate", "prate", "hidden1", "hidden2", "warmup",
                "discount", "bsize", "rmsize", "window_length", "tau",
                "ou_theta", "ou_sigma", "ou_mu", "validate_episodes",
                "max_episode_length", "validate_steps", "init_w", "train_iter",
                "epsilon", "seed", "resume"
        ]:
            arg_val = getattr(args, arg)
            exp.param(arg, arg_val)

        import socket
        exp.param("host", socket.gethostname())

    train(args,
          args.train_iter,
コード例 #20
0
def train(train_list,
          test_list,
          lr,
          epoch,
          batchsize,
          insize,
          outsize,
          save_interval=10,
          weight_decay=5e-4,
          lr_step=10,
          model_name='resnet34',
          loss_name='focal_loss',
          metric_name='arc_margin',
          optim_name='adam',
          num_workers=4,
          print_freq=1e+6,
          debug=False):

    device = torch.device("cuda")

    train_dataset = Dataset(train_list,
                            mode='train',
                            insize=insize,
                            debug=debug)
    trainloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=batchsize,
                                              shuffle=True,
                                              num_workers=num_workers)
    test_dataset = Dataset(test_list, mode='test', insize=insize, debug=debug)
    testloader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=batchsize,
                                             shuffle=False,
                                             num_workers=num_workers)
    class_num = train_dataset.get_classnum()

    print('{} train iters per epoch:'.format(len(trainloader)))
    print('{} test iters per epoch:'.format(len(testloader)))

    if loss_name == 'focal_loss':
        criterion = FocalLoss(gamma=2)
    else:
        criterion = torch.nn.CrossEntropyLoss()

    if model_name == 'resnet18':
        model = resnet_face18(insize, outsize)
    elif model_name == 'resnet34':
        model = resnet34(insize, outsize)
    elif model_name == 'resnet50':
        model = resnet50(insize, outsize)
    elif model_name == 'resnet101':
        model = resnet101(insize, outsize)
    elif model_name == 'resnet152':
        model = resnet152(insize, outsize)
    elif model_name == 'shuffle':
        model = ShuffleFaceNet(outsize)
    elif model_name == 'simplev1':
        model = CNNv1(insize, outsize, activation='relu', kernel_pattern='v1')
    else:
        raise ValueError('Invalid model name: {}'.format(model_name))

    if metric_name == 'add_margin':
        metric_fc = AddMarginProduct(outsize, class_num, s=30, m=0.35)
    elif metric_name == 'arc_margin':
        metric_fc = ArcMarginProduct(outsize,
                                     class_num,
                                     s=30,
                                     m=0.5,
                                     easy_margin=False)
    elif metric_name == 'sphere':
        metric_fc = SphereProduct(outsize, class_num, m=4)
    else:
        metric_fc = nn.Linear(outsize, class_num)

    # view_model(model, opt.input_shape)
    print(model)
    model.to(device)
    model = DataParallel(model)
    metric_fc.to(device)
    metric_fc = DataParallel(metric_fc)

    assert optim_name in ['sgd', 'adam']
    if optim_name == 'sgd':
        optimizer = torch.optim.SGD([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                    lr=lr,
                                    weight_decay=weight_decay)
    elif optim_name == 'adam':
        optimizer = torch.optim.Adam([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                     lr=lr,
                                     weight_decay=weight_decay)
    scheduler = StepLR(optimizer, step_size=lr_step, gamma=0.1)

    start = time.time()
    training_id = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    hyperdash_exp = Experiment(training_id)
    checkpoints_dir = os.path.join('logs', training_id)
    if not os.path.exists(checkpoints_dir):
        os.makedirs(checkpoints_dir)
    logging_path = os.path.join(checkpoints_dir, 'history.csv')

    config = {}
    config['train_list'] = train_list
    config['test_list'] = test_list
    config['lr'] = lr
    config['epoch'] = epoch
    config['batchsize'] = batchsize
    config['insize'] = insize
    config['outsize'] = outsize
    config['save_interval'] = save_interval
    config['weight_decay'] = weight_decay
    config['lr_step'] = lr_step
    config['model_name'] = model_name
    config['loss_name'] = loss_name
    config['metric_name'] = metric_name
    config['optim_name'] = optim_name
    config['num_workers'] = num_workers
    config['debug'] = debug
    for k, v in config.items():
        hyperdash_exp.param(k, v, log=False)
    with open(os.path.join(checkpoints_dir, 'train_config.json'), 'w') as f:
        json.dump(config, f, indent=4)

    with open(logging_path, 'w') as f:
        f.write('epoch,time_elapsed,train_loss,train_acc,test_loss,test_acc\n')

    prev_time = datetime.datetime.now()
    for i in range(epoch):
        model.train()
        for ii, data in enumerate(tqdm(trainloader, disable=True)):
            data_input, label = data
            data_input = data_input.to(device)
            label = label.to(device).long()
            feature = model(data_input)
            output = metric_fc(feature, label)
            loss = criterion(output, label)
            pred_classes = np.argmax(output.data.cpu().numpy(), axis=1)
            acc = np.mean(
                (pred_classes == label.data.cpu().numpy()).astype(int))
            optimizer.zero_grad()
            loss.backward()

            #import pdb; pdb.set_trace()
            optimizer.step()
            #scheduler.step()

            iters = i * len(trainloader) + ii

            if iters % print_freq == 0 or debug:
                speed = print_freq / (time.time() - start)
                time_str = time.asctime(time.localtime(time.time()))
                print('{} train epoch {} iter {} {} iters/s loss {} acc {}'.
                      format(time_str, i, ii, speed, loss.item(), acc))

                start = time.time()

        model.eval()
        for ii, data in enumerate(tqdm(testloader, disable=True)):
            data_input, label = data
            data_input = data_input.to(device)
            label = label.to(device).long()
            feature = model(data_input)
            output = metric_fc(feature, label)
            test_loss = criterion(output, label)
            output = np.argmax(output.data.cpu().numpy(), axis=1)
            test_acc = np.mean(
                (output == label.data.cpu().numpy()).astype(int))
            #test_acc = np.mean((torch.argmax(output, dim=1) == label).type(torch.int32))

        if i % save_interval == 0 or i == epoch:
            save_model(model.module, checkpoints_dir, model_name, i)
            save_model(metric_fc.module, checkpoints_dir, metric_name, i)

        new_time = datetime.datetime.now()
        with open(logging_path, 'a') as f:
            f.write('{},{},{},{},{},{}\n'.format(
                i, (new_time - prev_time).total_seconds(), loss.item(), acc,
                test_loss.item(), test_acc))
        prev_time = datetime.datetime.now()

        hyperdash_exp.metric('train_loss', loss.item(), log=False)
        hyperdash_exp.metric('train_acc', acc, log=False)
        hyperdash_exp.metric('test_loss', test_loss.item(), log=False)
        hyperdash_exp.metric('test_acc', test_acc, log=False)

    hyperdash_exp.end()
    print('Finished {}'.format(training_id))
コード例 #21
0
def run_pusher3dof(args, sim=True, vanilla=False):
    try:
        from hyperdash import Experiment

        hyperdash_support = True
    except:
        hyperdash_support = False

    env = NormalizedEnv(gym.make(args.env))

    torques = [1.0] * 3  # if real
    colored = False

    if sim:
        torques = [args.t0, args.t1, args.t2]
        colored = True

    if not vanilla:
        env.env._init(
            torques=torques,
            colored=colored
        )

    if args.seed > 0:
        np.random.seed(args.seed)
        env.seed(args.seed)

    nb_states = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]

    agent = DDPG(nb_states, nb_actions, args)
    evaluate = Evaluator(
        args.validate_episodes,
        args.validate_steps,
        args.output,
        max_episode_length=args.max_episode_length
    )

    exp = None

    if args.mode == 'train':
        if hyperdash_support:
            prefix = "real"
            if sim: prefix = "sim"

            exp = Experiment("s2r-pusher3dof-ddpg-{}".format(prefix))
            import socket

            exp.param("host", socket.gethostname())
            exp.param("type", prefix)  # sim or real
            exp.param("vanilla", vanilla)  # vanilla or not
            exp.param("torques", torques)
            exp.param("folder", args.output)

            for arg in ["env", "max_episode_length", "train_iter", "seed", "resume"]:
                arg_val = getattr(args, arg)
                exp.param(arg, arg_val)

        train(args, args.train_iter, agent, env, evaluate,
              args.validate_steps, args.output,
              max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

        # when done
        exp.end()

    elif args.mode == 'test':
        test(args.validate_episodes, agent, env, evaluate, args.resume,
             visualize=args.vis, debug=args.debug, load_best=args.best)

    else:
        raise RuntimeError('undefined mode {}'.format(args.mode))
コード例 #22
0
    def test_experiment(self):
        # Run a test job via the Experiment API
        # Make sure log file is where is supposed to be
        # look at decorator
        # verify run start/stop is sent
        with patch("sys.stdout", new=StringIO()) as faked_out:
            exp = Experiment("MNIST")
            exp.log("test print")
            exp.param("batch size", 32)
            for i in exp.iter(2):
                time.sleep(1)
                exp.metric("accuracy", i * 0.2)
            time.sleep(0.1)
            exp.end()

        # Test params match what is expected
        params_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "params" in payload:
                params_messages.append(payload)

        expect_params = [
            {
                "params": {
                    "batch size": 32,
                },
                "is_internal": False,
            },
            {
                "params": {
                    "hd_iter_0_epochs": 2,
                },
                "is_internal": True,
            },
        ]
        assert len(expect_params) == len(params_messages)
        for i, message in enumerate(params_messages):
            assert message == expect_params[i]

        # Test metrics match what is expected
        metrics_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "name" in payload:
                metrics_messages.append(payload)

        expect_metrics = [
            {
                "is_internal": True,
                "name": "hd_iter_0",
                "value": 0
            },
            {
                "is_internal": False,
                "name": "accuracy",
                "value": 0
            },
            {
                "is_internal": True,
                "name": "hd_iter_0",
                "value": 1
            },
            {
                "is_internal": False,
                "name": "accuracy",
                "value": 0.2
            },
        ]
        assert len(expect_metrics) == len(metrics_messages)
        for i, message in enumerate(metrics_messages):
            assert message == expect_metrics[i]

        captured_out = faked_out.getvalue()
        assert "error" not in captured_out

        # Make sure correct API name / version headers are sent
        assert server_sdk_headers[0][API_KEY_NAME] == API_NAME_EXPERIMENT
        assert server_sdk_headers[0][
            VERSION_KEY_NAME] == get_hyperdash_version()

        # Make sure logs were persisted
        expect_logs = [
            "{ batch size: 32 }",
            "test print",
            "| Iteration 0 of 1 |",
            "| accuracy:   0.000000 |",
        ]

        log_dir = get_hyperdash_logs_home_path_for_job("MNIST")
        latest_log_file = max([
            os.path.join(log_dir, filename) for filename in os.listdir(log_dir)
        ],
                              key=os.path.getmtime)
        with open(latest_log_file, "r") as log_file:
            data = log_file.read()
            for log in expect_logs:
                assert_in(log, data)
        os.remove(latest_log_file)
コード例 #23
0
# digits.py
from sklearn import svm, datasets
from hyperdash import Experiment

# Preprocess data
digits = datasets.load_digits(100)
test_cases = 50
X_train, y_train = digits.data[:-test_cases], digits.target[:-test_cases]
X_test, y_test = digits.data[-test_cases:], digits.target[-test_cases:]

# Create an experiment with a model name, then autostart
exp = Experiment("Digits Classifier")
# Record the value of hyperparameter gamma for this experiment
gamma = exp.param("gamma", 0.1)
# Param can record any basic type (Number, Boolean, String)

classifer = svm.SVC(gamma=gamma)
classifer.fit(X_train, y_train)

# Record a numerical performance metric
exp.metric("accuracy", classifer.score(X_test, y_test))

# Cleanup and mark that the experiment successfully completed
exp.end()
コード例 #24
0
if args.memdebug:
    import gc
    import objgraph
    import ipdb

run_name = time.strftime("%y%m%d%H%M%S")

assert args.algo in ['a2c', 'ppo', 'acktr']
if args.recurrent_policy:
    assert args.algo in ['a2c', 'ppo'], \
        'Recurrent policy is not implemented for ACKTR'

exp = None
if has_hyperdash:
    exp = Experiment("{} - {}".format(args.env_name, args.algo))
    exp.param("NAME", run_name)
    for param, value in vars(args).items():
        exp.param(param, value)

num_updates = int(args.num_frames) // args.num_steps // args.num_processes

num_breaks = num_updates / 10

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

try:
    os.makedirs(args.log_dir)
except OSError:
    files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv'))
コード例 #25
0
class KerasModel:
    def __init__(self,
                 name='deathbot',
                 load_weights=False,
                 training=False,
                 batch_size=100,
                 lr=1e-3,
                 location=None):
        self.session = tf.Session()
        self.name = name

        if training:
            from hyperdash import Experiment
            self.exp = Experiment(name)

        if name in MODELS.keys():
            self.model = MODELS[name]() if not training else MODELS[name](
                self.exp)
        adam = Adam(lr=lr)
        nadam = Nadam(lr=lr)
        #rms = RMSprop(lr=lr)
        #sgd = SGD(lr=lr)
        self.optimizer = adam if name == "evo" else nadam
        loss = ["binary_crossentropy", "categorical_crossentropy", "poisson"]
        self.model.compile(optimizer=self.optimizer,
                           loss=loss[1],
                           metrics=["acc"])

        self.callbacks = []
        if training:
            self.exp.param("lr", lr)
            reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                          factor=0.5,
                                          patience=4,
                                          min_lr=1e-4,
                                          verbose=1)
            tb = TensorBoard('./models/logs/{}'.format(name), write_graph=True)
            cp = ModelCheckpoint(
                filepath='./models/weights-{}.hdf5'.format(name),
                monitor='val_acc',
                verbose=1,
                save_best_only=True)
            hd = Hyperdash(self.exp, self.model)
            es = EarlyStopping('val_acc', patience=5, verbose=1)
            self.callbacks = [cp, tb, hd, reduce_lr, es]

        if load_weights:
            #print(os.listdir(os.getcwd()))
            self.model.load_weights('./deathbot/weights-{}.hdf5'.format(name))
            if training: print('Weights Loaded...')

    def save(self, path):
        self.model.save(path + self.name + ".h5")

    def fit(self, input_data, expected_output_data, batch_size=100, epochs=1):
        input_data = self.normalize_input(input_data)
        return self.model.fit(input_data,
                              expected_output_data,
                              batch_size=batch_size,
                              epochs=epochs,
                              verbose=1,
                              callbacks=self.callbacks,
                              validation_split=0.2,
                              shuffle=False)

    def predict(self, input_data, batch_size=1, p=False):
        return list(
            map(
                self.clean_pred,
                self.model.predict(self.normalize_input(np.array([input_data
                                                                  ])),
                                   batch_size=batch_size)[0]))

    def compute_loss(self, input_data, expected_output_data):
        return self.model.evaluate(self.normalize_input(input_data),
                                   expected_output_data,
                                   batch_size=1,
                                   verbose=1)

    @staticmethod
    def clean_pred(pred):
        return pred if pred > 0.01 else 0.0

    @staticmethod
    def normalize_input(input_data):
        # Assert the shape is what we expect
        assert len(input_data.shape) == 3 and input_data.shape[
            1] == PLANET_MAX_NUM and input_data.shape[2] == PER_PLANET_FEATURES
        m = np.expand_dims(input_data.mean(axis=1), axis=1)
        s = np.expand_dims(input_data.std(axis=1), axis=1)
        return (input_data - m) / (s + 1e-6)
コード例 #26
0
class NeuralNet(object):
    LAYER1_SIZE = 522  # 12
    LAYER2_SIZE = 256  # 6
    LAYER3_SIZE = 128
    LAYER4_SIZE = 64
    LAYER5_SIZE = 32
    OUTPUT_SIZE = 1

    def __init__(self,
                 name='nn-model',
                 cached_model=None,
                 seed=None,
                 lr=1e-4,
                 training=False):
        self.graph = tf.Graph()
        self.training = training
        if self.training:
            from hyperdash import Experiment

            self.exp = Experiment(name)

        with self.graph.as_default():
            if seed is not None:
                tf.set_random_seed(seed)
            self.session = tf.Session()
            self.features = tf.placeholder(dtype=tf.float32,
                                           name="input_features",
                                           shape=(None, PLANET_MAX_NUM,
                                                  PER_PLANET_FEATURES))
            # target_distribution describes what the bot did in a real game.
            # For instance, if it sent 20% of the ships to the first planet and 15% of the ships to the second planet,
            # then expected_distribution = [0.2, 0.15 ...]
            self.target_distribution = tf.placeholder(
                dtype=tf.float32,
                name="target_distribution",
                shape=(None, PLANET_MAX_NUM))
            # Combine all the planets from all the frames together, so it's easier to share
            # the weights and biases between them in the network.
            flattened_frames = tf.reshape(self.features,
                                          [-1, PER_PLANET_FEATURES])

            layer1 = fully_connected(flattened_frames, 512)
            layer2 = fully_connected(layer1, 256)
            layer3 = fully_connected(layer2, 128)
            # Group back into frames
            layer4 = fully_connected(layer3, 64)
            layer5 = fully_connected(layer4, 32)
            layer6 = fully_connected(layer5, 1, activation_fn=None)
            logits = tf.reshape(layer6, [-1, PLANET_MAX_NUM])

            self.prediction_normalized = tf.nn.softmax(logits)
            self.loss_op = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits, labels=self.target_distribution))

            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=lr)  # returns Op

            self.train_op = self.optimizer.minimize(self.loss_op)

            # self.acc_op = tf.reduce_mean(tf.reduce_min(tf.cast(self.prediction_normalized, tf.float32), 1))
            # self.acc, self.update_acc_op = tf.metrics.mean_per_class_accuracy(self.target_distribution, self.prediction_normalized, 28)
            # multilabel_accuracy(self.prediction_normalized, self.target_distribution)
            self.saver = tf.train.Saver()
            if self.training:
                self.exp.param("lr", lr)
            if cached_model is None:
                self.session.run([
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                ])
            else:
                self.session.run(tf.local_variables_initializer())
                self.saver.restore(self.session, cached_model)

    def fit(self, input_data, expected_output_data):
        loss, _ = self.session.run(
            [self.loss_op, self.train_op],
            feed_dict={
                self.features: normalize_input(input_data),
                self.target_distribution: expected_output_data
            })

        if self.training:
            self.exp.metric("training_loss", loss)
        return loss

    def predict(self, input_data):
        """
        Given data from 1 frame, predict where the ships should be sent.

        :param input_data: numpy array of shape (PLANET_MAX_NUM, PER_PLANET_FEATURES)
        :return: 1-D numpy array of length (PLANET_MAX_NUM) describing percentage of ships
        that should be sent to each planet
        """
        return self.session.run(
            self.prediction_normalized,
            feed_dict={self.features:
                       normalize_input(np.array([input_data]))})[0]

    def compute_loss(self, input_data, expected_output_data):
        """
        Compute loss on the input data without running any training.

        :param input_data: numpy array of shape (number of frames, PLANET_MAX_NUM, PER_PLANET_FEATURES)
        :param expected_output_data: numpy array of shape (number of frames, PLANET_MAX_NUM)
        :return: training loss on the input data
        """
        loss = self.session.run(self.loss_op,
                                feed_dict={
                                    self.features: normalize_input(input_data),
                                    self.target_distribution:
                                    expected_output_data
                                })
        if self.training:
            self.exp.metric("val_loss", loss)
        return loss

    def save(self, path):
        """
        Serializes this neural net to given path.
        :param path:
        """
        self.saver.save(self.session, path)
コード例 #27
0
def train_reconstruction(args):
    device = torch.device(args.gpu)
    print("Loading embedding model...")
    with open(
            os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                         'word_embedding.p'), "rb") as f:
        embedding_model = cPickle.load(f)
    with open(os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                           'word_idx.json'),
              "r",
              encoding='utf-8') as f:
        word_idx = json.load(f)
    print("Loading embedding model completed")
    print("Loading dataset...")
    train_dataset, val_dataset = load_text_data(args,
                                                CONFIG,
                                                word2idx=word_idx[1])
    print("Loading dataset completed")
    train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\
             DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    # t1 = max_sentence_len + 2 * (args.filter_shape - 1)
    t1 = CONFIG.MAX_SENTENCE_LEN
    t2 = int(math.floor(
        (t1 - args.filter_shape) / 2) + 1)  # "2" means stride size
    t3 = int(math.floor((t2 - args.filter_shape) / 2) + 1)
    args.t3 = t3
    embedding = nn.Embedding.from_pretrained(
        torch.FloatTensor(embedding_model))
    text_encoder = text_model.ConvolutionEncoder(embedding, t3,
                                                 args.filter_size,
                                                 args.filter_shape,
                                                 args.latent_size)
    text_decoder = text_model.DeconvolutionDecoder(embedding, args.tau, t3,
                                                   args.filter_size,
                                                   args.filter_shape,
                                                   args.latent_size, device)
    if args.resume:
        print("Restart from checkpoint")
        checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH,
                                             args.resume),
                                map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        text_encoder.load_state_dict(checkpoint['text_encoder'])
        text_decoder.load_state_dict(checkpoint['text_decoder'])
    else:
        print("Start from initial")
        start_epoch = 0

    text_autoencoder = text_model.TextAutoencoder(text_encoder, text_decoder)
    criterion = nn.NLLLoss().to(device)
    text_autoencoder.to(device)

    optimizer = AdamW(text_autoencoder.parameters(),
                      lr=1.,
                      weight_decay=args.weight_decay,
                      amsgrad=True)
    step_size = args.half_cycle_interval * len(train_loader)
    clr = cyclical_lr(step_size,
                      min_lr=args.lr,
                      max_lr=args.lr * args.lr_factor)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
    if args.resume:
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
    exp = Experiment("Text autoencoder " + str(args.latent_size),
                     capture_io=False)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        text_autoencoder.train()

        for epoch in range(start_epoch, args.epochs):
            print("Epoch: {}".format(epoch))
            for steps, batch in enumerate(train_loader):
                torch.cuda.empty_cache()
                feature = Variable(batch).to(device)
                optimizer.zero_grad()
                prob = text_autoencoder(feature)
                loss = criterion(prob.transpose(1, 2), feature)
                loss.backward()
                optimizer.step()
                scheduler.step()

                if (steps * args.batch_size) % args.log_interval == 0:
                    input_data = feature[0]
                    single_data = prob[0]
                    _, predict_index = torch.max(single_data, 1)
                    input_sentence = util.transform_idx2word(
                        input_data.detach().cpu().numpy(),
                        idx2word=word_idx[0])
                    predict_sentence = util.transform_idx2word(
                        predict_index.detach().cpu().numpy(),
                        idx2word=word_idx[0])
                    print("Epoch: {} at {} lr: {}".format(
                        epoch, str(datetime.datetime.now()),
                        str(scheduler.get_lr())))
                    print("Steps: {}".format(steps))
                    print("Loss: {}".format(loss.detach().item()))
                    print("Input Sentence:")
                    print(input_sentence)
                    print("Output Sentence:")
                    print(predict_sentence)
                    del input_data, single_data, _, predict_index
                del feature, prob, loss

            exp.log("\nEpoch: {} at {} lr: {}".format(
                epoch, str(datetime.datetime.now()), str(scheduler.get_lr())))
            _avg_loss, _rouge_1, _rouge_2 = eval_reconstruction_with_rouge(
                text_autoencoder, word_idx[0], criterion, val_loader, device)
            exp.log("\nEvaluation - loss: {}  Rouge1: {} Rouge2: {}".format(
                _avg_loss, _rouge_1, _rouge_2))

            util.save_models(
                {
                    'epoch': epoch + 1,
                    'text_encoder': text_encoder.state_dict(),
                    'text_decoder': text_decoder.state_dict(),
                    'avg_loss': _avg_loss,
                    'Rouge1:': _rouge_1,
                    'Rouge2': _rouge_2,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                }, CONFIG.CHECKPOINT_PATH,
                "text_autoencoder_" + str(args.latent_size))

        print("Finish!!!")

    finally:
        exp.end()
コード例 #28
0
def main(job_dir, data_path, model_id, weights_path, loss, train_csv, val_csv, batch_size, train_epocs, optimizer, is_tpu, lr, hyperdash_key, **args):
  logging.getLogger().setLevel(logging.INFO)

  if not os.path.exists("output"):
    os.makedirs("output")

  batch_size *= 3
  is_full_data = False
  hyperdash_capture_io = True

  # Setting up Hyperdash
  def get_api_key():
    return hyperdash_key

  if hyperdash_key:
    exp = Experiment(model_id, get_api_key, capture_io=hyperdash_capture_io)
    exp.param("model_name", job_dir.split("/")[-1])
    exp.param("data_path", data_path)
    exp.param("batch_size", batch_size)
    exp.param("train_epocs", train_epocs)
    exp.param("optimizer", optimizer)
    exp.param("lr", lr)
    if weights_path:
      exp.param("weights_path", weights_path)
    exp.param("loss", loss)
    exp.param("train_csv", train_csv)
    exp.param("val_csv", val_csv)

  logging.info("Downloading Training Image from path {}".format(data_path))
  downloads_training_images(data_path, is_cropped=("_cropped" in job_dir))

  logging.info("Building Model: {}".format(model_id))
  if model_id in globals():
    model_getter = globals()[model_id]
    model = model_getter()
  else:
    raise RuntimeError("Failed. Model function {} not found".format(model_id))

  if loss+"_fn" in globals():
    _loss_tensor = globals()[loss+"_fn"](batch_size)
  else:
    raise RuntimeError("Failed. Loss function {} not found".format(loss+"_fn"))

  accuracy = accuracy_fn(batch_size)
  img_width, img_height = [int(v) for v in model.input[0].shape[1:3]]

  trainable_count, non_trainable_count = print_trainable_counts(model)

  if hyperdash_key:
    exp.param("trainable_count", trainable_count)
    exp.param("non_trainable_count", non_trainable_count)

  dg = DataGenerator({
    "rescale": 1. / 255,
    "horizontal_flip": True,
    "vertical_flip": True,
    "zoom_range": 0.2,
    "shear_range": 0.2,
    "rotation_range": 30,
    "fill_mode": 'nearest'
  }, data_path, train_csv, val_csv, target_size=(img_width, img_height))

  train_generator = dg.get_train_generator(batch_size, is_full_data)
  test_generator = dg.get_test_generator(batch_size)

  if weights_path:
    with file_io.FileIO(weights_path, mode='r') as input_f:
        with file_io.FileIO("weights.h5", mode='w+') as output_f:
            output_f.write(input_f.read())
    model.load_weights("weights.h5")

  # model = multi_gpu_model(model, gpus=4)
  if optimizer=="mo":
    model.compile(loss=_loss_tensor, optimizer=tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9, use_nesterov=True), metrics=[accuracy])
  elif optimizer=="rms":
    model.compile(loss=_loss_tensor, optimizer=tf.train.RMSPropOptimizer(lr), metrics=[accuracy])
  else:
    logging.error("Optimizer not supported")
    return

  csv_logger = CSVLogger(job_dir, "output/training.log")
  model_checkpoint_path = "weights-improvement-{epoch:02d}-{val_loss:.2f}.h5"
  model_checkpointer = ModelCheckpoint(job_dir, model_checkpoint_path, save_best_only=True, save_weights_only=True, monitor="val_loss", verbose=1)
  tensorboard = TensorBoard(log_dir=job_dir + '/logs/', histogram_freq=0, write_graph=True, write_images=True)
  # test_accuracy = TestAccuracy(data_path)  # Not using test data as of now

  callbacks = [csv_logger, model_checkpointer, tensorboard]
  if hyperdash_key:
    callbacks.append(HyperdashCallback(exp))

  model_json = model.to_json()
  write_file_and_backup(model_json, job_dir, "output/model.def")

  with open("output/model_code.pkl", 'wb') as f:
    dill.dump(model_getter, f)
  backup_file(job_dir, "output/model_code.pkl")

  model_code = inspect.getsource(model_getter)
  write_file_and_backup(model_code, job_dir, "output/model_code.txt")

  if is_tpu:
    model = tf.contrib.tpu.keras_to_tpu_model(
        model,
        strategy=tf.contrib.tpu.TPUDistributionStrategy(
            tf.contrib.cluster_resolver.TPUClusterResolver(os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'])
        )
    )

  history = model.fit_generator(train_generator,
                                steps_per_epoch=(train_generator.n//(train_generator.batch_size)),
                                validation_data=test_generator,
                                epochs=train_epocs,
                                validation_steps=(test_generator.n//(test_generator.batch_size)),
                                callbacks=callbacks)

  pd.DataFrame(history.history).to_csv("output/history.csv")
  backup_file(job_dir, "output/history.csv")

  model.save_weights('output/model.h5')
  backup_file(job_dir, 'output/model.h5')
コード例 #29
0
ファイル: experiment.py プロジェクト: taichimaeda/EE
class Experiment:
    @logger.read
    def __init__(self, dataset_name, model_name, optimizer_name, trial_num):
        """
        :param dataset_name: name of the dataset
        :type dataset_name: str
        :param model_name: name of the model
        :type model_name: str
        :param optimizer_name: name of the optimizer
        :type optimizer_name: str
        :param trial_num: current number of repeated trials
        :type trial_num: int
        """
        # get optimized hyperparameters
        with open(
                f'../params/{dataset_name}_{model_name}_{optimizer_name}/result.json'
        ) as f:
            params = json.load(f)

        # get instances
        self.dataset = Datasets.get(dataset_name)
        self.model = Models.get(model_name, dataset=self.dataset)
        self.optimizer = Optimizers.get(optimizer_name, params=params)

        # get config
        with open('./config.json') as f:
            config = json.load(f)

        # get constants
        c = config['constants'][dataset_name][model_name]
        self.loss = c['loss']
        self.batch_size = c['batch_size']
        self.epochs = c['epochs']

        # configure and initialize directory
        d = self.main_dir = f'../data/{dataset_name}_{model_name}_{optimizer_name}/trial{trial_num}'
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)

        # configure hyperdash experiment
        self.hd_exp = HyperdashExperiment(
            f'{dataset_name}',
            api_key_getter=lambda: config['hyperdash']['api_key'])
        self.hd_exp.param('dataset_name', dataset_name)
        self.hd_exp.param('model_name', model_name)
        self.hd_exp.param('optimizer_name', optimizer_name)
        self.hd_exp.param('trial_num', trial_num)

        for k, v in params.items():
            self.hd_exp.param(k, v)

        # set callbacks
        self.callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      self.hd_exp),
            TensorBoard(log_dir=f'{self.main_dir}/tensorboard'),
            TimeLogger(filename=f'{self.main_dir}/time.csv'),
            CSVLogger(filename=f'{self.main_dir}/result.csv', append=True)
        ]

    @logger.write
    def begin(self):
        # get data
        (x_train, y_train), (x_test, y_test) = self.dataset.get_batch()

        # start learning
        self.model.compile(loss=self.loss,
                           optimizer=self.optimizer,
                           metrics=['accuracy'])
        self.model.fit(x_train,
                       y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       callbacks=self.callbacks,
                       validation_split=0.2,
                       verbose=2)

        # save final scores
        score = self.model.evaluate(x_test, y_test, verbose=1)
        with open(f'{self.main_dir}/test.json', 'w') as f:
            json.dump({
                'test loss': score[0],
                'test accuracy': score[1]
            },
                      f,
                      indent=4)

        # stop hyperdash experiment
        self.hd_exp.end()
        print("MODEL LOADED, CONTINUING TRAINING")
        return "TRAINING AVG LOSS: {}\n" \
               "TRAINING AVG DIFF: {}".format(
            checkpoint["epoch_avg_loss"], checkpoint["epoch_avg_diff"])
    else:
        if optional:
            pass  # model loading was optional, so nothing to do
        else:
            # shit, no model
            raise Exception("model couldn't be found:", MODEL_PATH_BEST)


loss_function = nn.MSELoss()
if hyperdash_support:
    exp = Experiment("simple lstm - pusher simple")
    exp.param("layers", LSTM_LAYERS)
    exp.param("nodes", HIDDEN_NODES)
    exp.param("action steps", ACTION_STEPS)

if TRAIN:
    optimizer = optim.Adam(net.parameters())
    if CONTINUE:
        old_model_string = loadModel(optional=True)
        print(old_model_string)
else:
    old_model_string = loadModel(optional=False)

loss_min = [float('inf')]

for epoch in np.arange(EPOCHS):