Beispiel #1
0
    "sentence": (True, True),
    "reiss": (False, False),
    "maslow": (False, False),
    "plutchik": (False, False),
    "plutchik16": (False, False),
    "entity": (True, False)
}

splits = ["train", "dev", "test"]

for experiment in ["emotion", "motivation"]:
    print("Making {} data for {} class of models".format(
        experiment, sys.argv[1]))

    opt = DD()
    opt.data = DD()
    opt.data.pruned = True

    # Make a memory model (EntNet, NPN) data loader for generation
    if sys.argv[1] == "memory":
        # Make save name
        name = "processed/{}/{}_{}_data_loader.pth".format(
            experiment, "gen_memory", "-".join(splits))
        print(name)

        # Initialize data loader and load vocabs and raw data
        data_loader = data.MemoryGenModelDataLoader()
        data_loader.load_vocabs(vocab_paths, vocab_text)
        data_loader.load_data(opt,
                              splits,
                              type_=experiment,
Beispiel #2
0
def get_parameters(opt, exp="class"):
    params = DD()
    params.net = DD()
    params.net.enc = DD()
    get_encoder_parameters(params, opt)
    params.net.gendec = DD()
    params.net.classdec = DD()

    if exp == "class":
        get_class_parameters(params, opt)
    elif exp == "gen":
        get_gendec_parameters(params, opt)
    else:
        get_class_parameters(params, opt)
        params.net.enc.pt = "none"
    params.train = DD()
    params.train.static = DD()
    params.train.dynamic = DD()
    params.eval = DD()
    params.data = DD()

    # Which experiment to run: {class = classification; gen = generation}
    params.exp = opt.experiment

    # Task to run: {emotion, motivation}
    params.task = opt.task

    if params.exp == "class":
        # % of development set stories to keep as training data
        params.train.static.tr = opt.train_ratio

        # granularity of labels
        # motivation: {maslow, reiss}
        # emotion: {plutchik, plutchik16}
        params.granularity = opt.granularity

        # Labeling type (default = majority)
        params.data.label = opt.label

    if params.exp == "gen":
        # Number of positive examples per negative example
        params.train.static.pnr = opt.pos_neg_ratio

        # Loss to use during training
        params.train.static.wcrit = "nll"

        # Prune useless words in motivation sequences such as
        # "to be" in "to be famous"
        params.data.pruned = opt.pruned

    # Max norm at which to clip gradients
    params.train.static.gc = opt.grad_clip

    # Random seed
    params.train.static.seed = opt.random_seed

    # learning rate
    params.train.dynamic.lr = opt.learning_rate

    # batch size
    params.train.dynamic.bs = opt.batch_size

    # optimizer to use {adam, rmsprop, etc.}
    # Only Adam is actually implemented
    params.train.dynamic.optim = opt.optimizer

    # Default parameters for the CNN model from
    # 2014 Yoon Kim paper
    if params.net.enc.model == "cnn+stock":
        params.net.enc.ks = "3,4,5"
        params.net.enc.kn = 100
        params.net.classdec.dpt = 0.5
        params.train.dynamic.lr = 0.001
        params.train.dynamic.bs = 64
        params.data.shuffle = False
        params.train.static.l2 = 3
        params.net.enc.iSize = 128
        params.net.classdec.hSize = 300

    meta = DD()
    meta.iterations = opt.iterations
    meta.epochs = opt.epochs
    meta.mark = opt.mark

    return params, meta