def main(): config = Config() args = add_arguments(config) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id # sample the dataset config.parse_arg(args) dset = Dataset(config) print('------------------------------------------------------------') print('Pivot word discovery:') dset.build() config.vocab_size = len(dset.word2id) print('------------------------------------------------------------') print('Pivot classifier:') dset.classify() print('------------------------------------------------------------') print('Precision-recall histogram:') dset.get_prec_recl() print('------------------------------------------------------------') print('Storing the pivot outputs') dset.store_pivots() # the logistic classifier if (args.classifier == 'ff'): classifier = FFClassifier(config) x_train, y_train = dset.to_bow_numpy('train') classifier.train(x_train, y_train) x_dev, y_dev = dset.to_bow_numpy('dev') classifier.test(x_dev, y_dev) x_test, y_test = dset.to_bow_numpy('test') classifier.test(x_test, y_test) elif (args.classifier == 'cnn'): cnn = CNNClassifier(config) x_train, y_train = dset.to_sent_numpy('train') cnn.train(x_train, y_train) x_dev, y_dev = dset.to_sent_numpy('dev') cnn.test(x_dev, y_dev) x_test, y_test = dset.to_sent_numpy('test') cnn.test(x_test, y_test) else: pass # correlation between the pivot words and logistic classifier words return
def main(): # configuration config = Config() config.parse_arg(FLAGS) config.setup_path() config.print_arg() # dataset if (config.dataset == 'wikibio'): dset = DatasetTable2text(config) dset.load() config.key_size = len(dset.key2id) else: dset = Dataset(config) dset.build() config.vocab_size = len(dset.word2id) config.dec_start_id = dset.word2id["_GOO"] config.dec_end_id = dset.word2id["_EOS"] config.pad_id = dset.pad_id config.stop_words = dset.stop_words config.id2wordemb = dset.id2wordemb # model if (config.model_name == "transformer_bow"): Model = TransformerBow elif (config.model_name == "seq2seq"): if (config.dataset == 'wikibio'): Model = Seq2seqData2text else: Model = Seq2seq elif (config.model_name == "bow_seq2seq"): Model = BowSeq2seq elif (config.model_name == "vae"): Model = Vae elif (config.model_name == "hierarchical_vae"): Model = Hierarchical_Vae elif (config.model_name == "latent_bow"): if (config.dataset == 'wikibio'): Model = LatentBowData2text else: Model = LatentBow elif (config.model_name == "lm"): Model = LM else: msg = "the model name shoule be in ['transformer_bow', 'seq2seq', 'vae', 'hierarchical_vae', 'latent_low', 'lm'], " msg += "current name: %s" % config.model_name raise Exception(msg) model = Model(config) with tf.variable_scope(config.model_name): model.build() # controller controller = Controller(config) if (config.model_name != "lm"): if ("lm" in controller.eval_metrics_list): controller.build_lm(LM, config) controller.train(model, dset) return
def main(): config = Config() args = add_arguments(config) config.parse_arg(args) dset = Dataset(config) dset.build() # print('debug:') # print(dset.id2word[1]) config.vocab_size = len(dset.word2id) # read the transfered sentences transfer_analysis = PivotTransferAnalysis(config) if(config.model == 'cmu'): transfer_analysis.pipeline_w_cmu(dset) else: transfer_analysis.pipeline(dset) return
def main(): config = json.load(open("config.json")) dset = Dataset(config) dset.build() return