Esempio n. 1
0
def main():
    config = Config()
    args = add_arguments(config)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

    # sample the dataset
    config.parse_arg(args)
    dset = Dataset(config)
    print('------------------------------------------------------------')
    print('Pivot word discovery:')
    dset.build()
    config.vocab_size = len(dset.word2id)

    print('------------------------------------------------------------')
    print('Pivot classifier:')
    dset.classify()

    print('------------------------------------------------------------')
    print('Precision-recall histogram:')
    dset.get_prec_recl()

    print('------------------------------------------------------------')
    print('Storing the pivot outputs')
    dset.store_pivots()

    # the logistic classifier
    if (args.classifier == 'ff'):
        classifier = FFClassifier(config)
        x_train, y_train = dset.to_bow_numpy('train')
        classifier.train(x_train, y_train)

        x_dev, y_dev = dset.to_bow_numpy('dev')
        classifier.test(x_dev, y_dev)

        x_test, y_test = dset.to_bow_numpy('test')
        classifier.test(x_test, y_test)
    elif (args.classifier == 'cnn'):
        cnn = CNNClassifier(config)
        x_train, y_train = dset.to_sent_numpy('train')
        cnn.train(x_train, y_train)

        x_dev, y_dev = dset.to_sent_numpy('dev')
        cnn.test(x_dev, y_dev)

        x_test, y_test = dset.to_sent_numpy('test')
        cnn.test(x_test, y_test)
    else:
        pass

    # correlation between the pivot words and logistic classifier words
    return
Esempio n. 2
0
def main():
    # configuration
    config = Config()
    config.parse_arg(FLAGS)
    config.setup_path()
    config.print_arg()

    # dataset
    if (config.dataset == 'wikibio'):
        dset = DatasetTable2text(config)
        dset.load()
        config.key_size = len(dset.key2id)
    else:
        dset = Dataset(config)
        dset.build()
    config.vocab_size = len(dset.word2id)
    config.dec_start_id = dset.word2id["_GOO"]
    config.dec_end_id = dset.word2id["_EOS"]
    config.pad_id = dset.pad_id
    config.stop_words = dset.stop_words
    config.id2wordemb = dset.id2wordemb

    # model
    if (config.model_name == "transformer_bow"):
        Model = TransformerBow
    elif (config.model_name == "seq2seq"):
        if (config.dataset == 'wikibio'): Model = Seq2seqData2text
        else: Model = Seq2seq
    elif (config.model_name == "bow_seq2seq"): Model = BowSeq2seq
    elif (config.model_name == "vae"): Model = Vae
    elif (config.model_name == "hierarchical_vae"): Model = Hierarchical_Vae
    elif (config.model_name == "latent_bow"):
        if (config.dataset == 'wikibio'): Model = LatentBowData2text
        else: Model = LatentBow
    elif (config.model_name == "lm"): Model = LM
    else:
        msg = "the model name shoule be in ['transformer_bow', 'seq2seq', 'vae', 'hierarchical_vae', 'latent_low', 'lm'], "
        msg += "current name: %s" % config.model_name
        raise Exception(msg)

    model = Model(config)
    with tf.variable_scope(config.model_name):
        model.build()

    # controller
    controller = Controller(config)
    if (config.model_name != "lm"):
        if ("lm" in controller.eval_metrics_list):
            controller.build_lm(LM, config)
    controller.train(model, dset)
    return
Esempio n. 3
0
def main():
  config = Config()
  args = add_arguments(config)
  config.parse_arg(args)
  dset = Dataset(config)
  dset.build()
  # print('debug:')
  # print(dset.id2word[1])
  config.vocab_size = len(dset.word2id)

  # read the transfered sentences
  transfer_analysis = PivotTransferAnalysis(config)

  if(config.model == 'cmu'):
    transfer_analysis.pipeline_w_cmu(dset)
  else:
    transfer_analysis.pipeline(dset)
  return 
Esempio n. 4
0
def main():
    config = json.load(open("config.json"))
    dset = Dataset(config)
    dset.build()
    return