def from_arguments(cls, c_args, val_each_epoch=True): """Constructor method to build BowVAETrainer from command-line arguments directly. Parameters: c_args (`argparse.Namespace`): Command-line arguments. val_each_epoch (bool): Flag for performing validation each epoch. optional (default = True) """ i_dt = datetime.datetime.now() log_out_dir = \ os.path.join(c_args.save_dir, "train_{}_{}_{}_{}_{}_{}_{}" .format(i_dt.year,i_dt.month,i_dt.day,i_dt.hour,i_dt.minute,i_dt.second,i_dt.microsecond)) if not os.path.exists(log_out_dir): lpath = Path(log_out_dir) lpath.mkdir(parents=True, exist_ok=True) if not log_utils.CONFIGURED: logging_config(folder=log_out_dir, name='tmnt', level=c_args.log_level, console_level=c_args.log_level) logging.info(c_args) seed_rng(c_args.seed) if c_args.vocab_file and c_args.tr_vec_file: vpath = Path(c_args.vocab_file) tpath = Path(c_args.tr_vec_file) if not (vpath.is_file() and tpath.is_file()): raise Exception( "Vocab file {} and/or training vector file {} do not exist" .format(c_args.vocab_file, c_args.tr_vec_file)) logging.info( "Loading data via pre-computed vocabulary and sparse vector format document representation" ) vocab = load_vocab(c_args.vocab_file, encoding=c_args.str_encoding) voc_size = len(vocab) X, y, wd_freqs, _ = file_to_data(c_args.tr_vec_file, voc_size) model_out_dir = c_args.model_dir if c_args.model_dir else os.path.join( log_out_dir, 'MODEL') n_covars = int(float(np.max(y)) + 1) if not os.path.exists(model_out_dir): os.mkdir(model_out_dir) return cls(log_out_dir, model_out_dir, vocab, wd_freqs, c_args.tr_vec_file, c_args.val_vec_file, coherence_via_encoder=c_args.encoder_coherence, pretrained_param_file=c_args.pretrained_param_file, topic_seed_file=c_args.topic_seed_file, use_labels_as_covars=c_args.use_labels_as_covars, use_gpu=c_args.use_gpu, n_covars=n_covars, val_each_epoch=val_each_epoch)
def from_arguments(cls, args, config): i_dt = datetime.datetime.now() train_out_dir = '{}/train_{}_{}_{}_{}_{}_{}'.format( args.save_dir, i_dt.year, i_dt.month, i_dt.day, i_dt.hour, i_dt.minute, i_dt.second) print("Set logging config to {}".format(train_out_dir)) logging_config(folder=train_out_dir, name='train_trans_vae', level=args.log_level, console_level=args.log_level, no_console=False) logging.info(args) bow_vocab = load_vocab(args.bow_vocab_file) trainer = cls(train_out_dir, bow_vocab, args.tr_file, args.val_file, use_gpu=args.use_gpu, log_interval=args.log_interval) return trainer
def from_arguments(cls, args, config): i_dt = datetime.datetime.now() train_out_dir = '{}/train_{}_{}_{}_{}_{}_{}'.format( args.save_dir, i_dt.year, i_dt.month, i_dt.day, i_dt.hour, i_dt.minute, i_dt.second) print("Set logging config to {}".format(train_out_dir)) logging_config(folder=train_out_dir, name='train_trans_vae', level=logging.INFO, no_console=False) logging.info(args) bow_vocab = load_vocab(args.bow_vocab_file) data_train, bert_base, vocab, data_csr = load_dataset_bert( args.tr_file, len(bow_vocab), max_len=config.sent_size, ctx=mx.cpu()) if args.val_file: data_val, _, _, val_csr = load_dataset_bert( args.val_file, len(bow_vocab), max_len=config.sent_size, ctx=mx.cpu()) val_wds = val_csr.sum().asscalar() else: data_val, val_csr, val_wds = None, None, None sample_size = min(50000, data_csr.shape[0]) data = data_csr[:sample_size] wd_freqs = mx.nd.sum(data, axis=0) trainer = cls(train_out_dir, bow_vocab, wd_freqs, val_wds, (data_train, data_csr), (data_val, val_csr), use_gpu=args.use_gpu, log_interval=args.log_interval) return trainer
top_k_terms = [] with io.open(in_file, 'r') as fp: for l in fp: ts = [t.strip() for t in l.split(',')] top_k_terms.append(ts) return top_k_terms os.environ["MXNET_STORAGE_FALLBACK_LOG_VERBOSE"] = "0" if __name__ == "__main__": parser = setup_parser() args = parser.parse_args() verbose = False ### XXX - add as argument vocab = load_vocab(args.vocab_file) if args.override_top_k_terms: top_k_words_per_topic = get_top_k_terms_from_file( args.override_top_k_terms) tst_csr, _, _, _ = file_to_data(args.test_file, len(vocab)) top_k_words_per_topic_ids = [[vocab[t] for t in t_set] for t_set in top_k_words_per_topic] npmi_eval = EvaluateNPMI(top_k_words_per_topic_ids) test_npmi = npmi_eval.evaluate_csr_mat(tst_csr) print("**** Test NPMI = {} *******".format(test_npmi)) exit(0) inference_model = BowVAEInferencer.from_saved( model_dir=args.model_dir, ctx=mx.cpu() if args.gpu < 0 else mx.gpu(args.gpu))
def from_arguments(cls, c_args, val_each_epoch=True): """Constructor method to build BowVAETrainer from command-line arguments directly. Parameters: c_args (`argparse.Namespace`): Command-line arguments. val_each_epoch (bool): Flag for performing validation each epoch. optional (default = True) """ i_dt = datetime.datetime.now() log_out_dir = \ os.path.join(c_args.save_dir, "train_{}_{}_{}_{}_{}_{}_{}" .format(i_dt.year,i_dt.month,i_dt.day,i_dt.hour,i_dt.minute,i_dt.second,i_dt.microsecond)) ll = c_args.log_level log_level = logging.INFO if ll.lower() == 'info': log_level = logging.INFO elif ll.lower() == 'debug': log_level = logging.DEBUG elif ll.lower() == 'error': log_level = logging.ERROR elif ll.lower() == 'warning': log_level = logging.WARNING else: log_level = logging.INFO logging_config(folder=log_out_dir, name='tmnt', level=log_level, console_level=log_level) logging.info(c_args) seed_rng(c_args.seed) if c_args.vocab_file and c_args.tr_vec_file: vpath = Path(c_args.vocab_file) tpath = Path(c_args.tr_vec_file) if not (vpath.is_file() and tpath.is_file()): raise Exception( "Vocab file {} and/or training vector file {} do not exist" .format(c_args.vocab_file, c_args.tr_vec_file)) logging.info( "Loading data via pre-computed vocabulary and sparse vector format document representation" ) vocab = load_vocab(c_args.vocab_file, encoding=c_args.str_encoding) voc_size = len(vocab) X, y, wd_freqs, _ = file_to_data(c_args.tr_vec_file, voc_size) total_test_wds = 0 if c_args.val_vec_file: val_X, val_y, _, total_test_wds = file_to_data( c_args.val_vec_file, voc_size) else: val_X, val_y, total_test_wds = None, None, 0 ctx = mx.cpu() if not c_args.use_gpu else mx.gpu(0) model_out_dir = c_args.model_dir if c_args.model_dir else os.path.join( log_out_dir, 'MODEL') if not os.path.exists(model_out_dir): os.mkdir(model_out_dir) return cls(log_out_dir, model_out_dir, c_args, vocab, wd_freqs, X, val_X, total_test_wds, train_labels=y, test_labels=val_y, label_map=None, use_gpu=c_args.use_gpu, val_each_epoch=val_each_epoch)