def train_multidec(args): print("Training multidec") device = torch.device(args.gpu) print("Loading dataset...") full_dataset = load_multi_csv_data(args, CONFIG) print("Loading dataset completed") # full_loader = DataLoader(full_dataset, batch_size=args.batch_size, shuffle=False) image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" + str(args.latent_dim)) + ".pt") text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" + str(args.latent_dim)) + ".pt") mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, n_clusters=args.n_clusters) exp = Experiment("MDEC " + str(args.latent_dim) + '_' + str(args.n_clusters), capture_io=True) print(mdec) for arg, value in vars(args).items(): exp.param(arg, value) try: mdec.fit(full_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=CONFIG.CHECKPOINT_PATH) print("Finish!!!") finally: exp.end()
def pretrain_ddec(args): print("Pretraining...") print("Loading dataset...") with open(os.path.join(args.text_embedding_dir, 'word_embedding.p'), "rb") as f: embedding_model = cPickle.load(f) with open(os.path.join(args.text_embedding_dir, 'word_idx.json'), "r", encoding='utf-8') as f: word_idx = json.load(f) train_dataset, test_dataset = load_pretrain_data(args.image_dir, word_idx[1], args, CONFIG) print("Loading dataset completed") dualnet = DualNet(pretrained_embedding=embedding_model, text_features=args.text_features, z_dim=args.z_dim, n_classes=args.n_classes) if args.resume: print("loading model...") dualnet.load_model("/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt") exp = Experiment("Dualnet_pretrain_" + str(args.z_dim), capture_io=True) print(dualnet) for arg, value in vars(args).items(): exp.param(arg, value) try: dualnet.fit(train_dataset, test_dataset, args=args, save_path="/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt") print("Finish!!!") finally: exp.end()
def objective(self, params): """ objective function to optimize :param params: hyperparamters for optimizer :return: maximum validation accuracy :rtype: float """ # get instances dataset = Datasets.get(self.dataset_name) model = Models.get(self.model_name, dataset=dataset) optimizer = Optimizers.get(self.optimizer_name, params=params) # configure hyperdash experiment hd_exp = HyperdashExperiment( f'{self.dataset_name}', api_key_getter=lambda: self.config['hyperdash']['api_key']) hd_exp.param('dataset_name', self.dataset_name) hd_exp.param('model_name', self.model_name) hd_exp.param('optimizer_name', self.optimizer_name) for k, v in params.items(): hd_exp.param(k, v) # set callbacks callbacks = [ Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'], hd_exp), EarlyStopping('val_accuracy', patience=10, min_delta=0.01, verbose=1), TerminateOnNaN() ] # get data (x_train, y_train), *_ = dataset.get_batch() # start learning model.compile(loss=self.loss, optimizer=optimizer, metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, callbacks=callbacks, validation_split=0.2, verbose=2) # stop hyperdash experiment hd_exp.end() # return maximum validation accuracy val_accuracy = np.array(history.history['val_accuracy']) return max(val_accuracy) * (-1)
def train_reconstruction_all(args): device = torch.device(args.gpu) df_input_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix + "_" + args.target_csv), index_col=0, encoding='utf-8-sig') exp = Experiment(args.target_modal + " SDAE " + str(args.latent_dim), capture_io=True) try: for arg, value in vars(args).items(): exp.param(arg, value) print("Loading dataset...") train_dataset, val_dataset = load_autoencoder_data( df_input_data, CONFIG) print("Loading dataset completed") train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle), \ DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) sdae = StackedDAE(input_dim=args.input_dim, z_dim=args.latent_dim, binary=False, encodeLayer=[500, 500, 2000], decodeLayer=[2000, 500, 500], activation="relu", dropout=args.dropout, device=device) if args.resume: print("resume from checkpoint") sdae.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix + "_" + args.target_modal + "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + "_all.pt")) else: sdae.pretrain(train_loader, val_loader, lr=args.lr, batch_size=args.batch_size, num_epochs=args.pretrain_epochs, corrupt=0.2, loss_type="mse") sdae.fit(train_loader, val_loader, lr=args.lr, num_epochs=args.epochs, corrupt=0.2, loss_type="mse", save_path=os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix + "_" + args.target_modal + "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + "_all.pt")) finally: exp.end()
def train_bayes(params): """ Wrapper around train function to serve as objective function for Gaussian optimization in scikit-optimize routine gp_minimize. Arguments: ---------- params: list, shape=[nb_layers + 2,] List of search space dimensions. Entries have to be tuples (lower_bound, upper_bound) for Reals or Integers. Returns: -------- tbd """ # Create Hyperdash hd_experiment hd_exp = Experiment(project_name) # Translate params into format understood by train function # n_layer = 4 # layer_sizes = hd_exp.param('layer_sizes', (2**np.array(params[:n_layer])).tolist()) # learning_rate = hd_exp.param('learning rate', 10**params[n_layer]) # mini_batch_size = hd_exp.param('mini batch size', int(2**params[n_layer + 1])) # pkeep = hd_exp.param('dropout prob', 1) # hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep] # hyper_param_str = make_hyper_param_str(hyper_params) layer_sizes = [4096] * 4 learning_rate = hd_exp.param('learning rate', 10**params[0]) mini_batch_size = hd_exp.param('mini batch size', int(2**params[1])) pkeep = hd_exp.param('dropout prob', 1) hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep] hyper_param_str = make_hyper_param_str(hyper_params) # Call train function tic = time.time() logger.info('Start training for ' + hyper_param_str) log_df, best_error = train(train_tuple, validation_tuple, hyper_params, nb_epochs, random_seed, hd_exp, project_dir) elapsed_time = time.time() - tic logger.info('Finished training in {} s.'.format(elapsed_time)) # Writing Pandas log file to csv file on disk. logger.info('Writing pandas DF log to disk.') log_df.to_csv(project_dir + '/' + hyper_param_str + '/data_df.csv') # Finish Hyperdash Experiment hd_exp.end() return best_error
class HyperdashCallback(Callback): exp = None last = 1 def on_train_begin(self, logs=None): self.exp = Experiment("Deep Weather") def on_train_end(self, logs=None): self.exp.end() def on_epoch_end(self, epoch, logs=None): if 'loss' in logs: self.exp.metric("progress", min(0.1, self.last - logs["loss"])) self.last = logs["loss"] self.exp.metric("loss", min(0.5, logs["loss"])) self.exp.metric("val_loss", min(0.5, logs["val_loss"]))
def test_experiment_keras_callback(self): with patch("sys.stdout", new=StringIO()) as faked_out: exp = Experiment("MNIST") keras_cb = exp.callbacks.keras keras_cb.on_epoch_end(0, {"val_acc": 1, "val_loss": 2}) # Sleep 1 second due to client sampling time.sleep(1) keras_cb.on_epoch_end(1, {"val_acc": 3, "val_loss": 4}) exp.end() # Test metrics match what is expected metrics_messages = [] for msg in server_sdk_messages: payload = msg["payload"] if "name" in payload: metrics_messages.append(payload) expect_metrics = [ { "is_internal": False, "name": "val_acc", "value": 1 }, { "is_internal": False, "name": "val_loss", "value": 2 }, { "is_internal": False, "name": "val_acc", "value": 3 }, { "is_internal": False, "name": "val_loss", "value": 4 }, ] assert len(expect_metrics) == len(metrics_messages) for i, message in enumerate(metrics_messages): assert message["is_internal"] == expect_metrics[i]["is_internal"] assert message["name"] == expect_metrics[i]["name"] assert message["value"] == expect_metrics[i]["value"] captured_out = faked_out.getvalue() assert "error" not in captured_out
class BaseTrainer(_BaseTrainer): """ Base trainer to make pytorch training be easier. Args: data-augmentation (bool): Crop randomly and add random noise for data augmentation. epoch (int): Number of epochs to train. opt (str): Optimization method. gpu (bool): Use GPU. seed (str): Random seed to train. train (str): Path to training image-pose list file. val (str): Path to validation image-pose list file. batchsize (int): Learning minibatch size. out (str): Output directory. resume (str): Initialize the trainer from given file. The file name is 'epoch-{epoch number}.iter'. resume_model (str): Load model definition file to use for resuming training (it\'s necessary when you resume a training). The file name is 'epoch-{epoch number}.model'. resume_opt (str): Load optimization states from this file (it\'s necessary when you resume a training). The file name is 'epoch-{epoch number}.state'. """ def __init__(self, **kwargs): self.data_augmentation = kwargs['data_augmentation'] self.epoch = kwargs['epoch'] self.gpu = (kwargs['gpu'] >= 0) self.opt = kwargs['opt'] self.seed = kwargs['seed'] self.train = kwargs['train'] self.val = kwargs['val'] self.batchsize = kwargs['batchsize'] self.out = kwargs['out'] self.resume = kwargs['resume'] self.resume_model = kwargs['resume_model'] self.resume_opt = kwargs['resume_opt'] self.hyperdash = kwargs['hyperdash'] if self.hyperdash: self.experiment = Experiment(self.hyperdash) for key, val in kwargs.items(): self.experiment.param(key, val) # validate arguments. self._validate_arguments() self.lowest_loss = 0 self.device = torch.device('cuda' if kwargs['gpu'] >= 0 else 'cpu') #self.experiment.log_multiple_params(kwargs) self.dataloader = torch.utils.data.DataLoader def _validate_arguments(self): if self.seed is not None and self.data_augmentation: raise NotSupportedError('It is not supported to fix random seed for data augmentation.') if self.gpu and not torch.cuda.is_available(): raise GPUNotFoundError('GPU is not found.') #for path in (self.train, self.val): # if not os.path.isfile(path): # raise FileNotFoundError('{0} is not found.'.format(path)) if self.opt not in ('MomentumSGD', 'Adam'): raise UnknownOptimizationMethodError( '{0} is unknown optimization method.'.format(self.opt)) if self.resume is not None: for path in (self.resume, self.resume_model, self.resume_opt): if not os.path.isfile(path): raise FileNotFoundError('{0} is not found.'.format(path)) # TODO: make it acceptable multiple optimizer, or define out of this trainer. def _get_optimizer(self, model, **kwargs): if self.opt == 'MomentumSGD': optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) elif self.opt == "Adam": optimizer = optim.Adam(model.parameters()) else: try: optimizer = getattr(optim, self.opt)(**kwargs) except OptimNotSupportedError: print("This optim is not available. See https://pytorch.org/docs/stable/optim.html") return optimizer def forward(self, batch, model, criterion): data, target = map(lambda d: d.to(self.device), batch) output = model(data) loss = criterion(output, target) return loss def _train(self, model, optimizer, criterion, train_iter, logger, start_time, log_interval=10): model.train() loss_sum = 0.0 for iteration, batch in enumerate(tqdm(train_iter, desc='this epoch'), 1): optimizer.zero_grad() loss = self.forward(batch, model, criterion, isTest=False) loss_sum += loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 500) optimizer.step() if self.hyperdash: self.experiment.metric("loss", int(loss.cpu().data.numpy()), log=False) if iteration % log_interval == 0: log = 'elapsed_time: {0}, loss: {1}'.format(time.time() - start_time, loss.data[0]) logger.write(log) return loss_sum / len(train_iter) def _test(self, model, test_iter, criterion, logger, start_time): model.eval() test_loss = 0 for batch in test_iter: loss = self.forward(batch, model, criterion, isTest=True) print('Test loss: {}'.format(loss.data)) test_loss += loss.item() test_loss /= len(test_iter) log = 'elapsed_time: {0}, validation/loss: {1}'.format(time.time() - start_time, test_loss) if self.hyperdash: self.experiment.metric('test_loss', int(test_loss.cpu().data.numpy())) logger.write(log) return test_loss def _checkpoint(self, epoch, model, optimizer, logger): filename = os.path.join(self.out, 'epoch-{0}'.format(epoch + 1)) torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter') torch.save(model.state_dict(), filename + '.model') torch.save(optimizer.state_dict(), filename + '.state') def _best_checkpoint(self, epoch, model, optimizer, logger): filename = os.path.join(self.out, 'best_model') torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter') torch.save(model.state_dict(), filename + '.model') torch.save(optimizer.state_dict(), filename + '.state') def fit(self, model, train_data, val_data, criterion): """ Execute training """ # set random seed. if self.seed is not None: random.seed(self.seed) torch.manual_seed(self.seed) if self.gpu: torch.cuda.manual_seed(self.seed) # initialize model to train. if self.resume_model: model.load_state_dict(torch.load(self.resume_model)) # prepare gpu. if self.gpu: model.cuda() # load the datasets. train_iter = self.dataloader(train_data, batch_size=self.batchsize, shuffle=True) val_iter = self.dataloader(val_data, batch_size=3, shuffle=False) # set up an optimizer. optimizer = self._get_optimizer(model) if self.resume_opt: optimizer.load_state_dict(torch.load(self.resume_opt)) # set intervals. val_interval = 3 resume_interval = self.epoch / 10 log_interval = 10 # set logger and start epoch. logger = TrainLogger(self.out) start_epoch = 0 if self.resume: resume = torch.load(self.resume) start_epoch = resume['epoch'] logger.load_state_dict(resume['logger']) # start training. start_time = time.time() loss = 0 for epoch in trange(start_epoch, self.epoch, initial=start_epoch, total=self.epoch, desc=' total'): self._train(model, optimizer, criterion, train_iter, log_interval, logger, start_time) if (epoch) % val_interval == 0: loss = self._test(model, val_iter, criterion, logger, start_time) if self.lowest_loss == 0 or self.lowest_loss > loss: logger.write('Best model updated. loss: {} => {}'.format(self.lowest_loss, loss)) self._best_checkpoint(epoch, model, optimizer, logger) self.lowest_loss = loss if (epoch + 1) % resume_interval == 0: self._checkpoint(epoch, model, optimizer, logger) if self.hyperdash: self.experiment.end() @staticmethod def get_args(): # arg definition parser = argparse.ArgumentParser( description='Training pose net for comparison \ between chainer and pytorch about implementing DeepPose.') parser.add_argument( '--data-augmentation', '-a', action='store_true', help='Crop randomly and add random noise for data augmentation.') parser.add_argument( '--epoch', '-e', type=int, default=100, help='Number of epochs to train.') parser.add_argument( '--opt', '-o', type=str, default='Adam', choices=['MomentumSGD', 'Adam'], help='Optimization method.') parser.add_argument( '--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU).') parser.add_argument( '--seed', '-s', type=int, help='Random seed to train.') parser.add_argument( '--train', type=str, default='data/train', help='Path to training image-pose list file.') parser.add_argument( '--val', type=str, default='data/test', help='Path to validation image-pose list file.') parser.add_argument( '--batchsize', type=int, default=32, help='Learning minibatch size.') parser.add_argument( '--out', default='result', help='Output directory') parser.add_argument( '--resume', default=None, help='Initialize the trainer from given file. \ The file name is "epoch-{epoch number}.iter".') parser.add_argument( '--resume-model', type=str, default=None, help='Load model definition file to use for resuming training \ (it\'s necessary when you resume a training). \ The file name is "epoch-{epoch number}.mode"') parser.add_argument( '--resume-opt', type=str, default=None, help='Load optimization states from this file \ (it\'s necessary when you resume a training). \ The file name is "epoch-{epoch number}.state"') parser.add_argument( '--hyperdash', type=str, default=None, help='If you use hyperdash logging, enter here the name of experiment. Before using, you have to login to hyperdash with "hyperdash login --github". The default is None that means no logging with hyperdash') args = parser.parse_args() return args
def train_multidec(args): print("Training started") device = torch.device(args.gpu) df_image_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.image_csv), index_col=0, encoding='utf-8-sig') df_text_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.text_csv), index_col=0, encoding='utf-8-sig') df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') df_weight = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.weight_csv), index_col=0, encoding='utf-8-sig') short_code_array = np.array(df_label.index) label_array = np.array(df_label['category']) n_classes = np.max(label_array) + 1 exp = Experiment("multi_classifier", capture_io=True) for arg, value in vars(args).items(): exp.param(arg, value) try: kf = KFold(n_splits=5, random_state=42) image_score_list = [] text_score_list = [] multi_score_list = [] kf_count = 0 for train_index, val_index in kf.split(short_code_array): print("Current fold: ", kf_count) short_code_train = short_code_array[train_index] short_code_val = short_code_array[val_index] label_train = label_array[train_index] label_val = label_array[val_index] df_train = pd.DataFrame(data=label_train, index=short_code_train, columns=df_label.columns) df_val = pd.DataFrame(data=label_val, index=short_code_val, columns=df_label.columns) print("Loading dataset...") train_dataset, val_dataset = load_multi_csv_data( df_image_data, df_text_data, df_weight, df_train, df_val, CONFIG) print("\nLoading dataset completed") if args.fixed_weight is None: image_classifier = SingleClassifier(device=device, input_dim=args.input_dim, filter_num=64, n_classes=n_classes) text_classifier = SingleClassifier(device=device, input_dim=args.input_dim, filter_num=64, n_classes=n_classes) print("pretraining image classifier...") image_classifier.fit( train_dataset, val_dataset, input_modal=1, lr=args.lr, num_epochs=args.pretrain_epochs, save_path=os.path.join(CONFIG.CHECKPOINT_PATH, "image_classifier") + ".pt") image_classifier.load_model( os.path.join(CONFIG.CHECKPOINT_PATH, "image_classifier") + ".pt") print("pretraining text classifier...") text_classifier.fit( train_dataset, val_dataset, input_modal=2, lr=args.lr, num_epochs=args.pretrain_epochs, save_path=os.path.join(CONFIG.CHECKPOINT_PATH, "text_classifier") + ".pt") text_classifier.load_model( os.path.join(CONFIG.CHECKPOINT_PATH, "text_classifier") + ".pt") print("pretraining weight classifier...") weight_calculator = WeightCalculator(device=device, input_dim=args.input_dim * 2, n_classes=n_classes) weight_calculator.fit( train_dataset, val_dataset, lr=args.lr, num_epochs=args.pretrain_epochs, save_path=os.path.join(CONFIG.CHECKPOINT_PATH, "weight_calculator") + ".pt") weight_calculator.load_model( os.path.join(CONFIG.CHECKPOINT_PATH, "weight_calculator") + ".pt") multi_classifier = MultiClassifier( device=device, image_classifier=image_classifier, text_classifier=text_classifier, weight_calculator=weight_calculator) print(multi_classifier) print("training multi classifier...") multi_classifier.fit( train_dataset, val_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join(CONFIG.CHECKPOINT_PATH, "multi_classifier") + ".pt") else: image_classifier = SingleClassifier(device=device, input_dim=args.input_dim, filter_num=64, n_classes=n_classes) text_classifier = SingleClassifier(device=device, input_dim=args.input_dim, filter_num=64, n_classes=n_classes) print("pretraining image classifier...") image_classifier.fit( train_dataset, val_dataset, input_modal=1, lr=args.lr, num_epochs=args.pretrain_epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, "image_classifier_fw_" + str(args.fixed_weight)) + ".pt") image_classifier.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, "image_classifier_fw_" + str(args.fixed_weight)) + ".pt") print("pretraining text classifier...") text_classifier.fit( train_dataset, val_dataset, input_modal=2, lr=args.lr, num_epochs=args.pretrain_epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, "text_classifier_fw_" + str(args.fixed_weight)) + ".pt") text_classifier.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, "text_classifier_fw_" + str(args.fixed_weight)) + ".pt") multi_classifier = MultiClassifier( device=device, image_classifier=image_classifier, text_classifier=text_classifier, fixed_weight=args.fixed_weight) print(multi_classifier) print("training multi classifier with fixed weight...") multi_classifier.fit( train_dataset, val_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, "multi_classifier_fw_" + str(args.fixed_weight)) + ".pt") print("Finish!!!") print( "#current fold best image score: %.6f, text score: %.6f multi score: %.6f" % (image_classifier.score, text_classifier.score, multi_classifier.score)) image_score_list.append(image_classifier.score) text_score_list.append(text_classifier.score) multi_score_list.append(multi_classifier.score) kf_count = kf_count + 1 print( "#average image score: %.6f, text score: %.6f multi score: %.6f" % (np.mean(image_score_list), np.mean(text_score_list), np.mean(multi_score_list))) finally: exp.end()
def train_multidec(args): print("Training test lstm") device = torch.device(args.gpu) with open( os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'word_embedding.p'), "rb") as f: embedding_model = cPickle.load(f) with open( os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'dictionary_list.p'), 'rb') as f: dictionary_list = cPickle.load(f) with open(os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'word_idx.json'), "r", encoding='utf-8') as f: word_idx = json.load(f) df_text_data = pd.read_csv(os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'posts.csv'), index_col=0, header=None, encoding='utf-8-sig') print(df_text_data[:5]) df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') df_text_data = df_text_data.loc[df_label.index] label_array = np.array(df_label['category']) n_clusters = np.max(label_array) + 1 exp = Experiment("Text lstm", capture_io=True) for arg, value in vars(args).items(): exp.param(arg, value) try: acc_list = [] nmi_list = [] f_1_list = [] kf_count = 0 for fold_idx in range(args.start_fold, args.fold): print("Current fold: ", kf_count) df_train = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "train_" + str(fold_idx) + "_" + args.label_csv), index_col=0, encoding='utf-8-sig') if args.sampled_n is not None: df_train = df_train.sample(n=args.sampled_n, random_state=42) df_test = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "test_" + str(fold_idx) + "_" + args.label_csv), index_col=0, encoding='utf-8-sig') embedding = nn.Embedding.from_pretrained( torch.FloatTensor(embedding_model)) if args.use_de: print("make dictionary embedding") dictionary_embedding = make_de(df_text_data, df_train, dictionary_list, n_clusters) input_size = int(embedding.embedding_dim + n_clusters) print("Loading dataset...") train_dataset, test_dataset = load_text_data( df_text_data, df_train, df_test, CONFIG, word2idx=word_idx[1], n_clusters=n_clusters, de=dictionary_embedding) print("\nLoading dataset completed") else: input_size = int(embedding.embedding_dim) print("Loading dataset...") train_dataset, test_dataset = load_text_data( df_text_data, df_train, df_test, CONFIG, word2idx=word_idx[1], n_clusters=n_clusters) print("\nLoading dataset completed") text_encoder = LSTMClassifier(device=device, batch_size=args.batch_size, input_size=input_size, output_size=n_clusters, hidden_size=[128, 256, 512], embedding=embedding, dropout=args.dropout) text_model = TextModel(device=device, text_encoder=text_encoder) text_model.fit(train_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=None, use_de=args.use_de) text_model.predict(test_dataset, batch_size=args.batch_size, use_de=args.use_de) acc_list.append(text_model.acc) nmi_list.append(text_model.nmi) f_1_list.append(text_model.f_1) kf_count = kf_count + 1 print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" % (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list))) finally: exp.end()
def main(): start_time = time() args = get_args() if args.checkpoint_dir_name: dir_name = args.checkpoint_dir_name else: dir_name = datetime.datetime.now().strftime('%y%m%d%H%M%S') path_to_dir = Path(__file__).resolve().parents[1] path_to_dir = os.path.join(path_to_dir, *['log', dir_name]) os.makedirs(path_to_dir, exist_ok=True) # tensorboard path_to_tensorboard = os.path.join(path_to_dir, 'tensorboard') os.makedirs(path_to_tensorboard, exist_ok=True) writer = SummaryWriter(path_to_tensorboard) # model saving os.makedirs(os.path.join(path_to_dir, 'model'), exist_ok=True) path_to_model = os.path.join(path_to_dir, *['model', 'model.tar']) # csv os.makedirs(os.path.join(path_to_dir, 'csv'), exist_ok=True) path_to_results_csv = os.path.join(path_to_dir, *['csv', 'results.csv']) path_to_args_csv = os.path.join(path_to_dir, *['csv', 'args.csv']) if not args.checkpoint_dir_name: with open(path_to_args_csv, 'a') as f: args_dict = vars(args) param_writer = csv.DictWriter(f, list(args_dict.keys())) param_writer.writeheader() param_writer.writerow(args_dict) # logging using hyperdash if not args.no_hyperdash: from hyperdash import Experiment exp = Experiment('Classification task on CIFAR10 dataset with CNN') for key in vars(args).keys(): exec("args.%s = exp.param('%s', args.%s)" % (key, key, key)) else: exp = None path_to_dataset = os.path.join( Path(__file__).resolve().parents[2], 'datasets') os.makedirs(path_to_dataset, exist_ok=True) train_loader, eval_loader, classes = get_loader( batch_size=args.batch_size, num_workers=args.num_workers, path_to_dataset=path_to_dataset) # show some of the training images, for fun. dataiter = iter(train_loader) images, labels = dataiter.next() img_grid = torchvision.utils.make_grid(images) matplotlib_imshow(img_grid) writer.add_image('four_CIFAR10_images', img_grid) # define a network, loss function and optimizer model = CNN() writer.add_graph(model, images) model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) start_epoch = 0 # resume training if args.checkpoint_dir_name: print('\nLoading the model...') checkpoint = torch.load(path_to_model) model.state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 summary(model, input_size=(3, 32, 32)) model.to(args.device) # train the network print('\n--------------------') print('Start training and evaluating the CNN') for epoch in range(start_epoch, args.n_epoch): start_time_per_epoch = time() train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.device, writer, epoch, classes) eval_loss, eval_acc = eval(eval_loader, model, criterion, args.device) elapsed_time_per_epoch = time() - start_time_per_epoch result_dict = { 'epoch': epoch, 'train_loss': train_loss, 'eval_loss': eval_loss, 'train_acc': train_acc, 'eval_acc': eval_acc, 'elapsed time': elapsed_time_per_epoch } with open(path_to_results_csv, 'a') as f: result_writer = csv.DictWriter(f, list(result_dict.keys())) if epoch == 0: result_writer.writeheader() result_writer.writerow(result_dict) # checkpoint torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, path_to_model) if exp: exp.metric('train loss', train_loss) exp.metric('eval loss', eval_loss) exp.metric('train acc', train_acc) exp.metric('eval acc', eval_acc) else: print(result_dict) writer.add_scalar('loss/train_loss', train_loss, epoch * len(train_loader)) writer.add_scalar('loss/eval_loss', eval_loss, epoch * len(eval_loader)) writer.add_scalar('acc/train_acc', train_acc, epoch * len(train_loader)) writer.add_scalar('acc/eval_acc', eval_acc, epoch * len(eval_loader)) elapsed_time = time() - start_time print('\nFinished Training, elapsed time ===> %f' % elapsed_time) if exp: exp.end() writer.close()
def main(): args = parse_args() # set random seed #logger.info('> set random seed {}'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) # Set up Devices #logger.info('> set gpu device {}'.format(args.gpus)) num_cuda_devices = utils.set_devices(args.gpus) # Load model #logger.info('> load model {}'.format(args.model_name)) ext = os.path.splitext(args.model_file)[1] model_path = '.'.join(os.path.split(args.model_file)).replace(ext, '') model = import_module(model_path) model = getattr(model, args.model_name)(args.output_class) if num_cuda_devices > 0: model = torch.nn.DataParallel(model) model.cuda() logger.info('> set optimizer') criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.initial_lr, momentum=args.lr_momentum) # Create result dir result_dir = create_result_dir(args.model_name) fh_handler = logging.FileHandler(os.path.join(result_dir, "log")) fh_handler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(fh_handler) shutil.copy(args.model_file, os.path.join(result_dir, os.path.basename(args.model_file))) script_file_list = glob.glob('./*.py') + glob.glob('./*.sh') for file_name in script_file_list: shutil.copy(file_name, os.path.join(result_dir, os.path.basename(file_name))) with open(os.path.join(result_dir, 'args'), 'w') as fp: fp.write(json.dumps(vars(args))) print(json.dumps(vars(args), sort_keys=True, indent=4)) # Create Dataset logger.info('> Creating DataSet') train_transform = partial(transforms.transform_f, random_angle=args.random_angle, expand_ratio=args.expand_ratio, crop_size=args.crop_size, train=True) train = getdataset.getCcoreDataset(args.train_json, train_transform, args.train_mode) val_transform = partial(transforms.transform_f, random_angle=args.random_angle, expand_ratio=args.expand_ratio, crop_size=args.crop_size, train=True) val = getdataset.getCcoreDataset(args.train_json, val_transform, args.train_mode) # Create DataLoader logger.info('> create dataloader') train_loader = torch.utils.data.DataLoader(train, batch_size=args.batchsize, shuffle=True, num_workers=4) val_loader = torch.utils.data.DataLoader(val, batch_size=args.batchsize, shuffle=False, num_workers=4) # Training logger.info('> run training') best_prec = 0 # Create Hyperdash Experiment logger.info('> Create Hyperdash Experiment {}'.format( args.experiment_name)) exp = Experiment(args.experiment_name, api_key_getter=utils.get_api_key_from_env) for epoch in tqdm(range(args.training_epoch)): training_result = training(train_loader, model, criterion, optimizer) val_result = validate(val_loader, model, criterion) result_str = 'epoch : {} / {}\ main/loss : {:.3f}\ main/acc : {:.3f}\ val/loss : {:.3f}\ val/acc : {:.3f}'.format(epoch, args.training_epoch, training_result['loss'], training_result['acc'], val_result['loss'], val_result['acc']) logger.info(result_str) exp.log(result_str) prec1 = val_result['acc'] # remember best prec@1 and save checkpoint is_best = prec1 > best_prec best_prec = max(prec1, best_prec) if is_best: save_checkpoint( state={ 'epoch': epoch + 1, #'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec': best_prec, 'optimizer': optimizer.state_dict(), }, is_best=is_best, result_dir=result_dir) exp.metric('main/loss', training_result['loss']) exp.metric('val/loss', val_result['loss']) logger.info('> end training') exp.end()
def train_multidec(args): print("Training multidec") device = torch.device(args.gpu) df_image_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.image_csv), index_col=0, encoding='utf-8-sig') df_text_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.text_csv), index_col=0, encoding='utf-8-sig') df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') short_code_array = np.array(df_label.index) label_array = np.array(df_label['category']) n_clusters = np.max(label_array) + 1 short_code_train, short_code_val, label_train, label_val = train_test_split( short_code_array, label_array, test_size=0.2, random_state=42) df_train = pd.DataFrame(data=label_train, index=short_code_train, columns=df_label.columns) df_val = pd.DataFrame(data=label_val, index=short_code_val, columns=df_label.columns) print("Loading dataset...") train_dataset, val_dataset = load_multi_csv_data(df_image_data, df_text_data, df_train, df_val, CONFIG) print("Loading dataset completed") image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) image_encoder.load_model( os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" + str(args.latent_dim)) + ".pt") text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) text_encoder.load_model( os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" + str(args.latent_dim)) + ".pt") mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, n_clusters=n_clusters) exp = Experiment("MDEC " + str(args.latent_dim), capture_io=True) print(mdec) for arg, value in vars(args).items(): exp.param(arg, value) try: mdec.fit(train_dataset, val_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=CONFIG.CHECKPOINT_PATH) print("Finish!!!") finally: exp.end()
validation_tuple = load_labeled_csv(validation_filename, feature_cols, label_cols) # Normalize training and validation data by training statistics train_mean = np.mean(train_tuple.features, axis=0) train_std = np.std(train_tuple.features, axis=0) train_tuple.features -= train_mean train_tuple.features /= train_std validation_tuple.features -= train_mean validation_tuple.features /= train_std logger.info('Finished importing and normalization of input data.') # ------------------------ Training --------------------------------------- # hd_exp = Experiment(hyper_param_str) # Run backpropagation training. df, best_error = train(train_tuple, validation_tuple, hyper_params, nb_epochs, random_seed, hd_exp, deep_cal_dir + '/code/') logger.info('Writing log dataframe to csv on disk.') df.to_csv(hyper_param_str + '/log_file.csv') # Finish Hyperdash experiment. hd_exp.end() logger.info("PROGRAM END.")
def run_pusher3dof(args, sim=True, vanilla=False): try: from hyperdash import Experiment hyperdash_support = True except: hyperdash_support = False env = NormalizedEnv(gym.make(args.env)) torques = [1.0] * 3 # if real colored = False if sim: torques = [args.t0, args.t1, args.t2] colored = True if not vanilla: env.env._init( torques=torques, colored=colored ) if args.seed > 0: np.random.seed(args.seed) env.seed(args.seed) nb_states = env.observation_space.shape[0] nb_actions = env.action_space.shape[0] agent = DDPG(nb_states, nb_actions, args) evaluate = Evaluator( args.validate_episodes, args.validate_steps, args.output, max_episode_length=args.max_episode_length ) exp = None if args.mode == 'train': if hyperdash_support: prefix = "real" if sim: prefix = "sim" exp = Experiment("s2r-pusher3dof-ddpg-{}".format(prefix)) import socket exp.param("host", socket.gethostname()) exp.param("type", prefix) # sim or real exp.param("vanilla", vanilla) # vanilla or not exp.param("torques", torques) exp.param("folder", args.output) for arg in ["env", "max_episode_length", "train_iter", "seed", "resume"]: arg_val = getattr(args, arg) exp.param(arg, arg_val) train(args, args.train_iter, agent, env, evaluate, args.validate_steps, args.output, max_episode_length=args.max_episode_length, debug=args.debug, exp=exp) # when done exp.end() elif args.mode == 'test': test(args.validate_episodes, agent, env, evaluate, args.resume, visualize=args.vis, debug=args.debug, load_best=args.best) else: raise RuntimeError('undefined mode {}'.format(args.mode))
def test_job(): exp = Experiment("Exception experiment") time.sleep(0.1) raise Exception(expected_exception) exp.end()
def train_reconstruction(args): device = torch.device(args.gpu) print("Loading embedding model...") with open( os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'word_embedding.p'), "rb") as f: embedding_model = cPickle.load(f) with open(os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'word_idx.json'), "r", encoding='utf-8') as f: word_idx = json.load(f) print("Loading embedding model completed") print("Loading dataset...") train_dataset, val_dataset = load_text_data(args, CONFIG, word2idx=word_idx[1]) print("Loading dataset completed") train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\ DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) # t1 = max_sentence_len + 2 * (args.filter_shape - 1) t1 = CONFIG.MAX_SENTENCE_LEN t2 = int(math.floor( (t1 - args.filter_shape) / 2) + 1) # "2" means stride size t3 = int(math.floor((t2 - args.filter_shape) / 2) + 1) args.t3 = t3 embedding = nn.Embedding.from_pretrained( torch.FloatTensor(embedding_model)) text_encoder = text_model.ConvolutionEncoder(embedding, t3, args.filter_size, args.filter_shape, args.latent_size) text_decoder = text_model.DeconvolutionDecoder(embedding, args.tau, t3, args.filter_size, args.filter_shape, args.latent_size, device) if args.resume: print("Restart from checkpoint") checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH, args.resume), map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] text_encoder.load_state_dict(checkpoint['text_encoder']) text_decoder.load_state_dict(checkpoint['text_decoder']) else: print("Start from initial") start_epoch = 0 text_autoencoder = text_model.TextAutoencoder(text_encoder, text_decoder) criterion = nn.NLLLoss().to(device) text_autoencoder.to(device) optimizer = AdamW(text_autoencoder.parameters(), lr=1., weight_decay=args.weight_decay, amsgrad=True) step_size = args.half_cycle_interval * len(train_loader) clr = cyclical_lr(step_size, min_lr=args.lr, max_lr=args.lr * args.lr_factor) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) if args.resume: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) exp = Experiment("Text autoencoder " + str(args.latent_size), capture_io=False) for arg, value in vars(args).items(): exp.param(arg, value) try: text_autoencoder.train() for epoch in range(start_epoch, args.epochs): print("Epoch: {}".format(epoch)) for steps, batch in enumerate(train_loader): torch.cuda.empty_cache() feature = Variable(batch).to(device) optimizer.zero_grad() prob = text_autoencoder(feature) loss = criterion(prob.transpose(1, 2), feature) loss.backward() optimizer.step() scheduler.step() if (steps * args.batch_size) % args.log_interval == 0: input_data = feature[0] single_data = prob[0] _, predict_index = torch.max(single_data, 1) input_sentence = util.transform_idx2word( input_data.detach().cpu().numpy(), idx2word=word_idx[0]) predict_sentence = util.transform_idx2word( predict_index.detach().cpu().numpy(), idx2word=word_idx[0]) print("Epoch: {} at {} lr: {}".format( epoch, str(datetime.datetime.now()), str(scheduler.get_lr()))) print("Steps: {}".format(steps)) print("Loss: {}".format(loss.detach().item())) print("Input Sentence:") print(input_sentence) print("Output Sentence:") print(predict_sentence) del input_data, single_data, _, predict_index del feature, prob, loss exp.log("\nEpoch: {} at {} lr: {}".format( epoch, str(datetime.datetime.now()), str(scheduler.get_lr()))) _avg_loss, _rouge_1, _rouge_2 = eval_reconstruction_with_rouge( text_autoencoder, word_idx[0], criterion, val_loader, device) exp.log("\nEvaluation - loss: {} Rouge1: {} Rouge2: {}".format( _avg_loss, _rouge_1, _rouge_2)) util.save_models( { 'epoch': epoch + 1, 'text_encoder': text_encoder.state_dict(), 'text_decoder': text_decoder.state_dict(), 'avg_loss': _avg_loss, 'Rouge1:': _rouge_1, 'Rouge2': _rouge_2, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, CONFIG.CHECKPOINT_PATH, "text_autoencoder_" + str(args.latent_size)) print("Finish!!!") finally: exp.end()
def test_experiment_handles_numpy_numbers(self): nums_to_test = [ ("int_", np.int_()), ("intc", np.intc()), ("intp", np.intp()), ("int8", np.int8()), ("int16", np.int16()), ("int32", np.int32()), ("int64", np.int64()), ("uint8", np.uint8()), ("uint16", np.uint16()), ("uint32", np.uint32()), ("uint64", np.uint64()), ("float16", np.float16()), ("float32", np.float32()), ("float64", np.float64()), ] # Make sure the SDK doesn't choke and JSON serialization works exp = Experiment("MNIST") for name, num in nums_to_test: exp.metric("test_metric_{}".format(name), num) exp.param("test_param_{}".format(name), num) exp.end() # Test params match what is expected params_messages = [] for msg in server_sdk_messages: payload = msg["payload"] if "params" in payload: params_messages.append(payload) expected_params = [] for name, num in nums_to_test: obj = { "params": {}, "is_internal": False, } obj["params"]["test_param_{}".format(name)] = num obj["is_internal"] = False expected_params.append(obj) assert len(expected_params) == len(params_messages) for i, message in enumerate(params_messages): print(message) print(expected_params[i]) assert message == expected_params[i] # Test metrics match what is expected metrics_messages = [] for msg in server_sdk_messages: payload = msg["payload"] if "name" in payload: metrics_messages.append(payload) expected_metrics = [] for name, num in nums_to_test: expected_metrics.append({ "name": "test_metric_{}".format(name), "value": num, "is_internal": False, }) assert len(expected_metrics) == len(metrics_messages) for i, message in enumerate(metrics_messages): assert message == expected_metrics[i]
def test_experiment(self): # Run a test job via the Experiment API # Make sure log file is where is supposed to be # look at decorator # verify run start/stop is sent with patch("sys.stdout", new=StringIO()) as faked_out: exp = Experiment("MNIST") exp.log("test print") exp.param("batch size", 32) for i in exp.iter(2): time.sleep(1) exp.metric("accuracy", i * 0.2) time.sleep(0.1) exp.end() # Test params match what is expected params_messages = [] for msg in server_sdk_messages: payload = msg["payload"] if "params" in payload: params_messages.append(payload) expect_params = [ { "params": { "batch size": 32, }, "is_internal": False, }, { "params": { "hd_iter_0_epochs": 2, }, "is_internal": True, }, ] assert len(expect_params) == len(params_messages) for i, message in enumerate(params_messages): assert message == expect_params[i] # Test metrics match what is expected metrics_messages = [] for msg in server_sdk_messages: payload = msg["payload"] if "name" in payload: metrics_messages.append(payload) expect_metrics = [ { "is_internal": True, "name": "hd_iter_0", "value": 0 }, { "is_internal": False, "name": "accuracy", "value": 0 }, { "is_internal": True, "name": "hd_iter_0", "value": 1 }, { "is_internal": False, "name": "accuracy", "value": 0.2 }, ] assert len(expect_metrics) == len(metrics_messages) for i, message in enumerate(metrics_messages): assert message == expect_metrics[i] captured_out = faked_out.getvalue() assert "error" not in captured_out # Make sure correct API name / version headers are sent assert server_sdk_headers[0][API_KEY_NAME] == API_NAME_EXPERIMENT assert server_sdk_headers[0][ VERSION_KEY_NAME] == get_hyperdash_version() # Make sure logs were persisted expect_logs = [ "{ batch size: 32 }", "test print", "| Iteration 0 of 1 |", "| accuracy: 0.000000 |", ] log_dir = get_hyperdash_logs_home_path_for_job("MNIST") latest_log_file = max([ os.path.join(log_dir, filename) for filename in os.listdir(log_dir) ], key=os.path.getmtime) with open(latest_log_file, "r") as log_file: data = log_file.read() for log in expect_logs: assert_in(log, data) os.remove(latest_log_file)
def train_multidec(args): print("Training unidec") device = torch.device(args.gpu) df_input_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix_csv + "_" + args.input_csv), index_col=0, encoding='utf-8-sig') df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') label_array = np.array(df_label['category']) n_clusters = np.max(label_array) + 1 exp = Experiment(args.prefix_csv + "_" + args.target_modal + "_UDEC", capture_io=True) for arg, value in vars(args).items(): exp.param(arg, value) try: acc_list = [] nmi_list = [] f_1_list = [] kf_count = 0 for fold_idx in range(args.fold): print("Current fold: ", kf_count) df_train = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "train_" + str(fold_idx) + "_category_label.csv"), index_col=0, encoding='utf-8-sig') df_test = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "test_" + str(fold_idx) + "_category_label.csv"), index_col=0, encoding='utf-8-sig') print("Loading dataset...") full_dataset, train_dataset, val_dataset = load_semi_supervised_uni_csv_data( df_input_data, df_train, df_test, CONFIG) print("\nLoading dataset completed") encoder = UDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) encoder.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_model + "_" + args.target_modal + "_sdae_" + str(fold_idx)) + ".pt") # encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_" + args.target_modal + "_sdae_" + str(fold_idx)) + ".pt") udec = UniDEC(device=device, encoder=encoder, use_prior=args.use_prior, n_clusters=n_clusters) udec.fit_predict( full_dataset, train_dataset, val_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_" + args.target_modal + "_udec_" + str(fold_idx)) + ".pt", tol=args.tol, kappa=args.kappa) acc_list.append(udec.acc) nmi_list.append(udec.nmi) f_1_list.append(udec.f_1) kf_count = kf_count + 1 print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" % (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list))) finally: exp.end()
def train_multidec(args): print("Training weight calc") device = torch.device(args.gpu) df_image_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix_csv + "_pca_normalized_image_encoded_" + args.target_dataset + ".csv"), index_col=0, encoding='utf-8-sig') df_text_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix_csv + "_text_doc2vec_" + args.target_dataset + ".csv"), index_col=0, encoding='utf-8-sig') df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') label_array = np.array(df_label['category']) n_clusters = np.max(label_array) + 1 #n_clusters = args.n_clusters exp = Experiment(args.prefix_csv + "_ODEC", capture_io=True) for arg, value in vars(args).items(): exp.param(arg, value) try: acc_list = [] nmi_list = [] f_1_list = [] for fold_idx in range(args.start_fold, args.fold): print("Current fold: ", fold_idx) df_train = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "train_" + str(fold_idx) + "_" + args.target_dataset + "_label.csv"), index_col=0, encoding='utf-8-sig') if args.sampled_n is not None: df_train = df_train.sample(n=args.sampled_n, random_state=42) df_test = pd.read_csv(os.path.join( CONFIG.CSV_PATH, "test_" + str(fold_idx) + "_" + args.target_dataset + "_label.csv"), index_col=0, encoding='utf-8-sig') print("Loading dataset...") full_dataset, train_dataset, val_dataset = load_semi_supervised_csv_data( df_image_data, df_text_data, df_train, df_test, CONFIG) print("\nLoading dataset completed") image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) image_encoder.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_model + "_image" "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + '_' + str(fold_idx)) + ".pt") # image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_image_sdae_" + str(fold_idx)) + ".pt") text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) text_encoder.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_model + "_text" "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + '_' + str(fold_idx)) + ".pt") # text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_text_sdae_" + str(fold_idx)) + ".pt") mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, ours=args.ours, use_prior=args.use_prior, fl=args.fl, n_clusters=n_clusters) mdec.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_odec_" + str(args.latent_dim) + '_' + str(fold_idx)) + ".pt") mdec.to(device) mdec.eval() wcalc = WeightCalc(device=device, ours=args.ours, use_prior=args.use_prior, input_dim=args.input_dim, n_clusters=n_clusters) wcalc.fit_predict( mdec, full_dataset, train_dataset, val_dataset, args, CONFIG, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_wcalc_" + str(args.latent_dim) + '_' + str(fold_idx)) + ".pt", tol=args.tol, kappa=args.kappa) acc_list.append(wcalc.acc) nmi_list.append(wcalc.nmi) f_1_list.append(wcalc.f_1) print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" % (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list))) finally: exp.end()
def train_reconstruction(train_loader, test_loader, encoder, decoder, args): exp = Experiment("Reconstruction Training") #vis = Visualizations() vis = visdom.Visdom(port=8098) try: lr = args.lr encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr) decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr) encoder.train() decoder.train() steps = 0 all_losses = [] for epoch in range(1, args.epochs + 1): epoch_losses = [] print("=======Epoch========") print(epoch) for batch in train_loader: feature = batch # Variable if args.use_cuda: encoder.cuda() decoder.cuda() feature = feature.cuda() encoder_opt.zero_grad() decoder_opt.zero_grad() h = encoder(feature) prob = decoder(h) reconstruction_loss = compute_cross_entropy(prob, feature) reconstruction_loss.backward() encoder_opt.step() decoder_opt.step() print("Epoch: {}".format(epoch)) print("Steps: {}".format(steps)) print("Loss: {}".format(reconstruction_loss.item() / args.sentence_len)) exp.metric("Loss", reconstruction_loss.item() / args.sentence_len) epoch_losses.append(reconstruction_loss.item()) # check reconstructed sentence if steps % args.log_interval == 0: print("Test!!") input_data = feature[0] single_data = prob[0] _, predict_index = torch.max(single_data, 1) input_sentence = transform_id2word( input_data.data, train_loader.dataset.index2word, lang="en") predict_sentence = transform_id2word( predict_index.data, train_loader.dataset.index2word, lang="en") print("Input Sentence:") print(input_sentence) print("Output Sentence:") print(predict_sentence) steps += 1 # Visualization data epoch_loss = sum(epoch_losses) / float(len(epoch_losses)) all_losses.append(epoch_loss) if epoch == 1: # vis.plot_loss(np.mean(epoch_losses), steps) win = vis.line(X=np.array((epoch, )), Y=np.array((epoch_loss, )), name="train_loss", opts=dict(xlabel='Epoch', ylabel='Loss', title='Train and Eval Loss')) else: vis.line(X=np.array((epoch, )), Y=np.array((epoch_loss, )), name="train_loss", update="append", win=win) #epoch_losses.clear() if epoch % args.test_interval == 0: eval_reconstruction(encoder, decoder, test_loader, args, vis, win, epoch) if epoch % args.lr_decay_interval == 0: # decrease learning rate lr = lr / 1.05 encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr) decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr) encoder.train() decoder.train() if epoch % args.save_interval == 0: save_models(encoder, args.save_dir, "encoder", steps) save_models(decoder, args.save_dir, "decoder", steps) if epoch % 20 == 0: # finalization # save vocabulary #with open("word2index", "wb") as w2i, open("index2word", "wb") as i2w: # pickle.dump(train_loader.dataset.word2index, w2i) # pickle.dump(train_loader.dataset.index2word, i2w) torch.save(train_loader.dataset.index2word, "/home/avshalom/ext/ae_cnn_code/index2word.pt") torch.save(train_loader.dataset.word2index, "/home/avshalom/ext/ae_cnn_code/word2index.pt") # save models #save_models(encoder, args.save_dir, "encoder", "final") #save_models(decoder, args.save_dir, "decoder", "final") torch.save( encoder, "/home/avshalom/ext/ae_cnn_code/encoder_lsize_%s_epoch_%s.pt" % (args.latent_size, epoch)) print("Finish!!!") finally: exp.end()
def demo(args=None): from_file = get_api_key_from_file() from_env = get_api_key_from_env() api_key = from_env or from_file if not api_key: print(""" `hyperdash demo` requires a Hyperdash API key. Try setting your API key in the HYPERDASH_API_KEY environment variable, or in a hyperdash.json file in the local directory or your user's home directory with the following format: { "api_key": "<YOUR_API_KEY>" } """) return print(""" Running the following program: from hyperdash import Experiment exp = Experiment("Dogs vs. Cats") # Parameters estimators = exp.param("Estimators", 500) epochs = exp.param("Epochs", 5) batch = exp.param("Batch Size", 64) for epoch in xrange(1, epochs + 1): accuracy = 1. - 1./epoch loss = float(epochs - epoch)/epochs print("Training model (epoch {})".format(epoch)) time.sleep(1) # Metrics exp.metric("Accuracy", accuracy) exp.metric("Loss", loss) exp.end() """) from hyperdash import Experiment exp = Experiment("Dogs vs. Cats") # Parameters estimators = exp.param("Estimators", 500) epochs = exp.param("Epochs", 5) batch = exp.param("Batch Size", 64) for epoch in xrange(epochs): print("Training model (epoch {})".format(epoch)) accuracy = 1. - 1. / (epoch + 1) loss = float(epochs - epoch) / (epochs + 1) # Metrics exp.metric("Accuracy", accuracy) exp.metric("Loss", loss) time.sleep(1) exp.end()
def train(train_list, test_list, lr, epoch, batchsize, insize, outsize, save_interval=10, weight_decay=5e-4, lr_step=10, model_name='resnet34', loss_name='focal_loss', metric_name='arc_margin', optim_name='adam', num_workers=4, print_freq=1e+6, debug=False): device = torch.device("cuda") train_dataset = Dataset(train_list, mode='train', insize=insize, debug=debug) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batchsize, shuffle=True, num_workers=num_workers) test_dataset = Dataset(test_list, mode='test', insize=insize, debug=debug) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=num_workers) class_num = train_dataset.get_classnum() print('{} train iters per epoch:'.format(len(trainloader))) print('{} test iters per epoch:'.format(len(testloader))) if loss_name == 'focal_loss': criterion = FocalLoss(gamma=2) else: criterion = torch.nn.CrossEntropyLoss() if model_name == 'resnet18': model = resnet_face18(insize, outsize) elif model_name == 'resnet34': model = resnet34(insize, outsize) elif model_name == 'resnet50': model = resnet50(insize, outsize) elif model_name == 'resnet101': model = resnet101(insize, outsize) elif model_name == 'resnet152': model = resnet152(insize, outsize) elif model_name == 'shuffle': model = ShuffleFaceNet(outsize) elif model_name == 'simplev1': model = CNNv1(insize, outsize, activation='relu', kernel_pattern='v1') else: raise ValueError('Invalid model name: {}'.format(model_name)) if metric_name == 'add_margin': metric_fc = AddMarginProduct(outsize, class_num, s=30, m=0.35) elif metric_name == 'arc_margin': metric_fc = ArcMarginProduct(outsize, class_num, s=30, m=0.5, easy_margin=False) elif metric_name == 'sphere': metric_fc = SphereProduct(outsize, class_num, m=4) else: metric_fc = nn.Linear(outsize, class_num) # view_model(model, opt.input_shape) print(model) model.to(device) model = DataParallel(model) metric_fc.to(device) metric_fc = DataParallel(metric_fc) assert optim_name in ['sgd', 'adam'] if optim_name == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=lr, weight_decay=weight_decay) elif optim_name == 'adam': optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=lr, weight_decay=weight_decay) scheduler = StepLR(optimizer, step_size=lr_step, gamma=0.1) start = time.time() training_id = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') hyperdash_exp = Experiment(training_id) checkpoints_dir = os.path.join('logs', training_id) if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) logging_path = os.path.join(checkpoints_dir, 'history.csv') config = {} config['train_list'] = train_list config['test_list'] = test_list config['lr'] = lr config['epoch'] = epoch config['batchsize'] = batchsize config['insize'] = insize config['outsize'] = outsize config['save_interval'] = save_interval config['weight_decay'] = weight_decay config['lr_step'] = lr_step config['model_name'] = model_name config['loss_name'] = loss_name config['metric_name'] = metric_name config['optim_name'] = optim_name config['num_workers'] = num_workers config['debug'] = debug for k, v in config.items(): hyperdash_exp.param(k, v, log=False) with open(os.path.join(checkpoints_dir, 'train_config.json'), 'w') as f: json.dump(config, f, indent=4) with open(logging_path, 'w') as f: f.write('epoch,time_elapsed,train_loss,train_acc,test_loss,test_acc\n') prev_time = datetime.datetime.now() for i in range(epoch): model.train() for ii, data in enumerate(tqdm(trainloader, disable=True)): data_input, label = data data_input = data_input.to(device) label = label.to(device).long() feature = model(data_input) output = metric_fc(feature, label) loss = criterion(output, label) pred_classes = np.argmax(output.data.cpu().numpy(), axis=1) acc = np.mean( (pred_classes == label.data.cpu().numpy()).astype(int)) optimizer.zero_grad() loss.backward() #import pdb; pdb.set_trace() optimizer.step() #scheduler.step() iters = i * len(trainloader) + ii if iters % print_freq == 0 or debug: speed = print_freq / (time.time() - start) time_str = time.asctime(time.localtime(time.time())) print('{} train epoch {} iter {} {} iters/s loss {} acc {}'. format(time_str, i, ii, speed, loss.item(), acc)) start = time.time() model.eval() for ii, data in enumerate(tqdm(testloader, disable=True)): data_input, label = data data_input = data_input.to(device) label = label.to(device).long() feature = model(data_input) output = metric_fc(feature, label) test_loss = criterion(output, label) output = np.argmax(output.data.cpu().numpy(), axis=1) test_acc = np.mean( (output == label.data.cpu().numpy()).astype(int)) #test_acc = np.mean((torch.argmax(output, dim=1) == label).type(torch.int32)) if i % save_interval == 0 or i == epoch: save_model(model.module, checkpoints_dir, model_name, i) save_model(metric_fc.module, checkpoints_dir, metric_name, i) new_time = datetime.datetime.now() with open(logging_path, 'a') as f: f.write('{},{},{},{},{},{}\n'.format( i, (new_time - prev_time).total_seconds(), loss.item(), acc, test_loss.item(), test_acc)) prev_time = datetime.datetime.now() hyperdash_exp.metric('train_loss', loss.item(), log=False) hyperdash_exp.metric('train_acc', acc, log=False) hyperdash_exp.metric('test_loss', test_loss.item(), log=False) hyperdash_exp.metric('test_acc', test_acc, log=False) hyperdash_exp.end() print('Finished {}'.format(training_id))
if TRAIN: optimizer.step() loss.detach_() net.hidden[0].detach_() net.hidden[1].detach_() printEpisodeLoss(epoch_idx, episode_idx, loss_episode, diff_episode, len(x)) loss_epoch += loss_epi diff_epoch += diff_episode printEpochLoss(epoch_idx, episode_idx, loss_epoch, diff_epoch) if TRAIN: saveModel(state=net.state_dict(), epoch=epoch_idx, episode_idx=episode_idx, loss_epoch=loss_epoch, diff_epoch=diff_epoch, is_best=(loss_epoch < min(loss_history))) loss_history.append(loss_epoch) else: print(old_model_string) break # Cleanup and mark that the experiment successfully completed if hyperdash_support: exp.end()
def train_multidec_transductive(args): print("Training multidec") device = torch.device(args.gpu) df_image_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix_csv + "_" + args.image_csv), index_col=0, encoding='utf-8-sig') df_text_data = pd.read_csv(os.path.join( CONFIG.CSV_PATH, args.prefix_csv + "_" + args.text_csv), index_col=0, encoding='utf-8-sig') df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv), index_col=0, encoding='utf-8-sig') label_array = np.array(df_label['category']) n_clusters = np.max(label_array) + 1 exp = Experiment(args.prefix_csv + "_MDEC", capture_io=True) for arg, value in vars(args).items(): exp.param(arg, value) try: df_train = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.trans_csv), index_col=0, encoding='utf-8-sig') print("Loading dataset...") full_dataset, train_dataset = load_transductive_semi_supervised_csv_data( df_image_data, df_text_data, df_label, df_train, CONFIG) print("\nLoading dataset completed") image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) image_encoder.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_model + "_image" "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + "_all.pt")) # image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_image_sdae_" + str(fold_idx)) + ".pt") text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) text_encoder.load_model( os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_model + "_text" "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) + "_all.pt")) # text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_text_sdae_" + str(fold_idx)) + ".pt") mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, ours=args.ours, use_prior=args.use_prior, n_clusters=n_clusters) if args.ssldec: mdec.fit_predict_transductive_ssldec( full_dataset, train_dataset, args, CONFIG, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_mdec_" + str(args.latent_dim) + "_all.pt"), tol=args.tol, kappa=args.kappa) else: mdec.fit_predict_transductive( full_dataset, train_dataset, args, CONFIG, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, save_path=os.path.join( CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_mdec_" + str(args.latent_dim) + "_all.pt"), tol=args.tol, kappa=args.kappa) print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" % (mdec.acc, mdec.nmi, mdec.f_1)) finally: exp.end()
def train_reconstruction(train_loader, test_loader, encoder, decoder, args): exp = Experiment("Reconstruction Training") try: lr = args.lr encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr) decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr) encoder.train() decoder.train() steps = 0 for epoch in range(1, args.epochs+1): print("=======Epoch========") print(epoch) for batch in train_loader: feature = Variable(batch) if args.use_cuda: encoder.cuda() decoder.cuda() feature = feature.cuda() encoder_opt.zero_grad() decoder_opt.zero_grad() h = encoder(feature) prob = decoder(h) reconstruction_loss = compute_cross_entropy(prob, feature) reconstruction_loss.backward() encoder_opt.step() decoder_opt.step() steps += 1 print("Epoch: {}".format(epoch)) print("Steps: {}".format(steps)) print("Loss: {}".format(reconstruction_loss.data[0] / args.sentence_len)) exp.metric("Loss", reconstruction_loss.data[0] / args.sentence_len) # check reconstructed sentence if steps % args.log_interval == 0: print("Test!!") input_data = feature[0] single_data = prob[0] _, predict_index = torch.max(single_data, 1) input_sentence = util.transform_id2word(input_data.data, train_loader.dataset.index2word, lang="en") predict_sentence = util.transform_id2word(predict_index.data, train_loader.dataset.index2word, lang="en") print("Input Sentence:") print(input_sentence) print("Output Sentence:") print(predict_sentence) if steps % args.test_interval == 0: eval_reconstruction(encoder, decoder, test_loader, args) if epoch % args.lr_decay_interval == 0: # decrease learning rate lr = lr / 5 encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr) decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr) encoder.train() decoder.train() if epoch % args.save_interval == 0: util.save_models(encoder, args.save_dir, "encoder", steps) util.save_models(decoder, args.save_dir, "decoder", steps) # finalization # save vocabulary with open("word2index", "wb") as w2i, open("index2word", "wb") as i2w: pickle.dump(train_loader.dataset.word2index, w2i) pickle.dump(train_loader.dataset.index2word, i2w) # save models util.save_models(encoder, args.save_dir, "encoder", "final") util.save_models(decoder, args.save_dir, "decoder", "final") print("Finish!!!") finally: exp.end()
def train_reconstruction(args): device = torch.device(args.gpu) print("Loading dataset...") train_dataset, val_dataset = load_imgseq_data(args, CONFIG) print("Loading dataset completed") train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\ DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) #imgseq_encoder = imgseq_model.RNNEncoder(args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True) #imgseq_decoder = imgseq_model.RNNDecoder(CONFIG.MAX_SEQUENCE_LEN, args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True) t1 = CONFIG.MAX_SEQUENCE_LEN t2 = int(math.floor((t1 - 3) / 1) + 1) # "2" means stride size t3 = int(math.floor((t2 - 3) / 1) + 1) imgseq_encoder = imgseq_model.ConvolutionEncoder( embedding_dim=args.embedding_dim, t3=t3, filter_size=300, filter_shape=3, latent_size=1000) imgseq_decoder = imgseq_model.DeconvolutionDecoder( embedding_dim=args.embedding_dim, t3=t3, filter_size=300, filter_shape=3, latent_size=1000) if args.resume: print("Restart from checkpoint") checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH, args.resume), map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] imgseq_encoder.load_state_dict(checkpoint['imgseq_encoder']) imgseq_decoder.load_state_dict(checkpoint['imgseq_decoder']) else: print("Start from initial") start_epoch = 0 imgseq_autoencoder = imgseq_model.ImgseqAutoEncoder( imgseq_encoder, imgseq_decoder) criterion = nn.MSELoss().to(device) imgseq_autoencoder.to(device) optimizer = AdamW(imgseq_autoencoder.parameters(), lr=1., weight_decay=args.weight_decay, amsgrad=True) step_size = args.half_cycle_interval * len(train_loader) clr = cyclical_lr(step_size, min_lr=args.lr, max_lr=args.lr * args.lr_factor) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) if args.resume: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) exp = Experiment("Image-sequence autoencoder " + str(args.latent_size), capture_io=False) for arg, value in vars(args).items(): exp.param(arg, value) try: imgseq_autoencoder.train() for epoch in range(start_epoch, args.epochs): print("Epoch: {}".format(epoch)) for steps, batch in enumerate(train_loader): torch.cuda.empty_cache() feature = Variable(batch).to(device) optimizer.zero_grad() feature_hat = imgseq_autoencoder(feature) loss = criterion(feature_hat, feature) loss.backward() optimizer.step() scheduler.step() if (steps * args.batch_size) % args.log_interval == 0: print("Epoch: {} at {} lr: {}".format( epoch, str(datetime.datetime.now()), str(scheduler.get_lr()))) print("Steps: {}".format(steps)) print("Loss: {}".format(loss.detach().item())) input_data = feature[0] del feature, feature_hat, loss exp.log("\nEpoch: {} at {} lr: {}".format( epoch, str(datetime.datetime.now()), str(scheduler.get_lr()))) _avg_loss = eval_reconstruction(imgseq_autoencoder, criterion, val_loader, device) exp.log("\nEvaluation - loss: {}".format(_avg_loss)) util.save_models( { 'epoch': epoch + 1, 'imgseq_encoder': imgseq_encoder.state_dict(), 'imgseq_decoder': imgseq_decoder.state_dict(), 'avg_loss': _avg_loss, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, CONFIG.CHECKPOINT_PATH, "imgseq_autoencoder_" + str(args.latent_size)) print("Finish!!!") finally: exp.end()
class Experiment: @logger.read def __init__(self, dataset_name, model_name, optimizer_name, trial_num): """ :param dataset_name: name of the dataset :type dataset_name: str :param model_name: name of the model :type model_name: str :param optimizer_name: name of the optimizer :type optimizer_name: str :param trial_num: current number of repeated trials :type trial_num: int """ # get optimized hyperparameters with open( f'../params/{dataset_name}_{model_name}_{optimizer_name}/result.json' ) as f: params = json.load(f) # get instances self.dataset = Datasets.get(dataset_name) self.model = Models.get(model_name, dataset=self.dataset) self.optimizer = Optimizers.get(optimizer_name, params=params) # get config with open('./config.json') as f: config = json.load(f) # get constants c = config['constants'][dataset_name][model_name] self.loss = c['loss'] self.batch_size = c['batch_size'] self.epochs = c['epochs'] # configure and initialize directory d = self.main_dir = f'../data/{dataset_name}_{model_name}_{optimizer_name}/trial{trial_num}' if os.path.exists(d): shutil.rmtree(d) os.makedirs(d) # configure hyperdash experiment self.hd_exp = HyperdashExperiment( f'{dataset_name}', api_key_getter=lambda: config['hyperdash']['api_key']) self.hd_exp.param('dataset_name', dataset_name) self.hd_exp.param('model_name', model_name) self.hd_exp.param('optimizer_name', optimizer_name) self.hd_exp.param('trial_num', trial_num) for k, v in params.items(): self.hd_exp.param(k, v) # set callbacks self.callbacks = [ Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'], self.hd_exp), TensorBoard(log_dir=f'{self.main_dir}/tensorboard'), TimeLogger(filename=f'{self.main_dir}/time.csv'), CSVLogger(filename=f'{self.main_dir}/result.csv', append=True) ] @logger.write def begin(self): # get data (x_train, y_train), (x_test, y_test) = self.dataset.get_batch() # start learning self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=['accuracy']) self.model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, callbacks=self.callbacks, validation_split=0.2, verbose=2) # save final scores score = self.model.evaluate(x_test, y_test, verbose=1) with open(f'{self.main_dir}/test.json', 'w') as f: json.dump({ 'test loss': score[0], 'test accuracy': score[1] }, f, indent=4) # stop hyperdash experiment self.hd_exp.end()
def run(self): # # hyperdash # exp = Experiment("faceid") hd_callback = Hyperdash(exp=exp) # print(self.create_couple("RGB-D_Face_database/faceid_train/")) # print(self.create_couple_rgbd("RGB-D_Face_database/faceid_val/")) self.create_couple_rgbd("RGB-D_Face_database/faceid_val/") # print(self.create_wrong("RGB-D_Face_database/faceid_train/")) # print(self.create_wrong_rgbd("RGB-D_Face_database/faceid_val/")[0].shape) # quit() # # create network # img_input = Input(shape=(200, 200, 4)) x = Convolution2D(64, (5, 5), strides=(2, 2), padding='valid')(img_input) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = self.fire(x, squeeze=16, expand=16) x = self.fire(x, squeeze=16, expand=16) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = self.fire(x, squeeze=32, expand=32) x = self.fire(x, squeeze=32, expand=32) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) x = self.fire(x, squeeze=48, expand=48) x = self.fire(x, squeeze=48, expand=48) x = self.fire(x, squeeze=64, expand=64) x = self.fire(x, squeeze=64, expand=64) x = Dropout(0.2)(x) x = Convolution2D(512, (1, 1), padding='same')(x) out = Activation('relu')(x) modelsqueeze = Model(img_input, out) print("\nmodel squeeze summary") modelsqueeze.summary() plot_model(modelsqueeze, show_shapes=True, to_file='model_squeeze.png') im_in = Input(shape=(200, 200, 4)) x1 = modelsqueeze(im_in) x1 = Flatten()(x1) x1 = Dense(512, activation="relu")(x1) x1 = Dropout(0.2)(x1) feat_x = Dense(128, activation="linear")(x1) feat_x = Lambda(lambda x: K.l2_normalize(x, axis=1))(feat_x) model_top = Model(inputs=[im_in], outputs=feat_x) print("\nmodel top summary") model_top.summary() plot_model(model_top, show_shapes=True, to_file='model_top.png') im_in1 = Input(shape=(200, 200, 4)) im_in2 = Input(shape=(200, 200, 4)) feat_x1 = model_top(im_in1) feat_x2 = model_top(im_in2) lambda_merge = Lambda(self.euclidean_distance)([feat_x1, feat_x2]) model_final = Model(inputs=[im_in1, im_in2], outputs=lambda_merge) print("\nmodel final summary") model_final.summary() plot_model(model_final, show_shapes=True, to_file='model_final.png') adam = Adam(lr=0.001) sgd = SGD(lr=0.001, momentum=0.9) model_final.compile(optimizer=adam, loss=self.contrastive_loss) # # plot model # # print("write model summary png...") # plot_model(model_final, show_shapes=True, to_file='model.png') # print("write model summary png...done") # # generator # gen = self.generator(16) val_gen = self.val_generator(4) # # checkpoint # 各エポック終了後にモデルを保存 # file_name = str(datetime.datetime.now()).split(' ')[0] + '_{epoch:02d}.hdf5' # filepath = os.path.join(save_dir, file_name) # """ keras.callbacks.ModelCheckpoint( filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) """ drive_dir = 'RGB-D_Face_database/snapshot/' base_file_name = 'model' checkpointer = keras.callbacks.ModelCheckpoint( # filepath=drive_dir+base_file_name+'.{epoch:02d}-loss{loss:.2f}-acc{acc:.2f}-vloss{val_loss:.2f}-vacc{val_acc:.2f}.hdf5', filepath=drive_dir + base_file_name + '.epoch{epoch:03d}-loss{loss:.4f}-val_loss{val_loss:.4f}.hdf5', # filepath=drive_dir+base_file_name+'.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only=True, # monitor='val_acc', monitor='val_loss', mode='auto') # # ProgressbarLogger # pbarl = keras.callbacks.ProgbarLogger(count_mode='samples') # # CSV Logger # 各エポックの結果をcsvファイルに保存する (Google Driveでは学習終了まで反映されない。localに保存) # """ keras.callbacks.CSVLogger( filename, separator=',', append=False) """ csv_logger = keras.callbacks.CSVLogger('./xxx.log') # # reduce LR on plateau # 評価値の改善が止まった時に学習率を減らす # """ keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0) """ reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001) # # early stopping # """ keras.callbacks.EarlyStopping( monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto') """ early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', verbose=1, patience=100) # # tensor board # """ keras.callbacks.TensorBoard( log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None) $tensorboard --logdir=/full_path_to_your_logs """ # tensorboard = keras.callbacks.TensorBoard(log_dir="RGB-D_Face_database/log", histogram_freq=1) # old_session = KTF.get_session() # new_session = tf.Session('') # KTF.set_session(new_session) # # generator # """ outputs = model_final.fit_generator( generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0) """ # steps_per_epoch=30, # epochs=50, # callbacks=[checkpointer, csv_logger, reduce_lr, early_stop, tensorboard, hd_callback], # validation_steps=20 # fit_generator( # self, # generator, # steps_per_epoch=None, # epochs=1, # verbose=1, # callbacks=None, # validation_data=None, # validation_steps=None, # class_weight=None, # max_queue_size=10, # workers=1, # use_multiprocessing=False, # shuffle=True, # initial_epoch=0) outputs = model_final.fit_generator( gen, steps_per_epoch=10, # 30 epochs=1, # 50 verbose=1, # callbacks=[checkpointer], # callbacks=[checkpointer, hd_callback], callbacks=[ checkpointer, csv_logger, early_stop, reduce_lr, hd_callback ], # callbacks=[checkpointer, pbarl, csv_logger, early_stop, reduce_lr, hd_callback], # callbacks=[checkpointer, csv_logger, early_stop, reduce_lr, tensorboard, hd_callback], # pickle_safe=True, validation_data=val_gen, validation_steps=20, # workers=8, use_multiprocessing=True) # 20 # # model save # print('saving model_final...') model_final.save("RGB-D_Face_database/snapshot/model_final.h5") print('saving model_final...done') # # model test # """ """ cop = self.create_couple("RGB-D_Face_database/faceid_val/") score = model_final.evaluate([ cop[0].reshape((1, 200, 200, 4)), cop[1].reshape((1, 200, 200, 4)) ], np.array([0.])) print('Test score(couple):', score[0]) print('Test accuracy(couple):', score[1]) cop = self.create_wrong_rgbd("RGB-D_Face_database/faceid_val/") score = model_final.predict([ cop[0].reshape((1, 200, 200, 4)), cop[1].reshape((1, 200, 200, 4)) ]) print('Test score(wrong_rgbd):', score[0]) print('Test accuracy(wrong_rgbd):', score[1]) # # save model (architecture,json) # print('save the architecture of a model...') json_string = model_final.to_json() open(drive_dir + base_file_name + 'model.json', 'w').write(json_string) # open(os.path.join(drive_dir+base_file_name,'model.json'), 'w').write(json_string) print('save the architecture of a model...done') print('save weights...') yaml_string = model_final.to_yaml() open(drive_dir + base_file_name + 'model.yaml', 'w').write(yaml_string) # open(os.path.join(drive_dir+base_file_name,'model.yaml'), 'w').write(yaml_string) model_final.save_weights(drive_dir + base_file_name + 'model_weights.hdf5') # model_final.save_weights(os.path.join(drive_dir+base_file_name,'model_weights.hdf5')) print('save weights...done') # debug print('debug: load_model...') del model_final model_final = keras.models.load_model( "RGB-D_Face_database/snapshot/model_final.h5", # custom_objects={ # 'euclidean_distance': euclidean_distance, # 'contrastive_loss': contrastive_loss, # 'l2_normalize': K.l2_normalize # }, compile=False) print('debug: load_model...done') # # tensorboard # # KTF.set_session(old_session) # print('tensorboard done') # # hyperdash # print('hyperdash done') exp.end()