def create_vocabulary(vocab_fp, data_path, sep='\t'): """Creates a word vocabulary using a simple pipeline.""" vocab_pipeline = assemble_vocab_pipeline(text_fname=InpDataF.REV_TEXT, sep=sep) words_vocab = Vocabulary(vocab_pipeline, name_prefix="words") # adding special symbols before creating vocab, so they would appear on top for st in VOCAB_DEFAULT_SYMBOLS: if st not in words_vocab: words_vocab.add_special_symbol(st) words_vocab.create(data_source={'data_path': data_path}, data_fnames=InpDataF.REV_TEXT) words_vocab.write(vocab_fp, sep=' ')
r=run_hp.c_r, max_val=run_hp.c_kl_ann_max_val) z_kl_ann = KlCycAnnealing(t=run_hp.z_kl_ann_batches, m=run_hp.z_m, r=run_hp.c_r, max_val=run_hp.z_kl_ann_max_val) # PIPELINES AND VOCAB # vocab_pipeline = assemble_vocab_pipeline(text_fname=InpDataF.REV_TEXT) word_vocab = Vocabulary(vocab_pipeline, name_prefix="word") # adding special symbols before creating vocab, so they would appear on top for st in VOCAB_DEFAULT_SYMBOLS: if st not in word_vocab: word_vocab.add_special_symbol(st) word_vocab.load_or_create(run_hp.words_vocab_fp, data_source=vocab_data_source, max_size=model_hp.ext_vocab_size, sep=' ', data_fnames=InpDataF.REV_TEXT) word_vocab.write(comb_paths(run_hp.output_path, "word_vocab.txt"), sep=' ') train_pipeline = assemble_train_pipeline( word_vocab, max_groups_per_batch=run_hp.train_max_groups_per_batch, min_revs_per_group=run_hp.max_rev_per_group, max_revs_per_group=run_hp.max_rev_per_group, seed=None,