コード例 #1
0
def read_config(file_):
    config = DD()
    print(file_)
    for k, v in file_.items():
        if v == "True" or v == "T" or v == "true":
            config[k] = True
        elif v == "False" or v == "F" or v == "false":
            config[k] = False
        elif type(v) == dict:
            config[k] = read_config(v)
        else:
            config[k] = v

    return config
コード例 #2
0
def get_data_parameters(opt, experiment, dataset):
    data = DD()
    if dataset == "atomic":
        data.categories = sorted(opt.categories)
        # hard-coded
        data.maxe1 = 17
        data.maxe2 = 35
        data.maxr = 1

    elif dataset == "conceptnet":
        data.rel = opt.relation_format
        data.trainsize = opt.training_set_size
        data.devversion = opt.development_set_versions_to_use
        data.maxe1 = opt.max_event_1_size
        data.maxe2 = opt.max_event_2_size
        if data.rel == "language":
            # hard-coded
            data.maxr = 5
        else:
            # hard-coded
            data.maxr = 1

    return data
コード例 #3
0
def get_eval_parameters(opt, force_categories=None):
    evaluate = DD()

    if opt.eval_sampler == "beam":
        evaluate.bs = opt.beam_size
    elif opt.eval_sampler == "greedy":
        evaluate.bs = 1
    elif opt.eval_sampler == "topk":
        evaluate.k = opt.topk_size

    evaluate.smax = opt.gen_seqlength
    evaluate.sample = opt.eval_sampler

    evaluate.numseq = opt.num_sequences

    evaluate.gs = opt.generate_sequences
    evaluate.es = opt.evaluate_sequences

    if opt.dataset == "atomic":
        if "eval_categories" in opt and force_categories is None:
            evaluate.categories = opt.eval_categories
        else:
            evaluate.categories = force_categories

    return evaluate
コード例 #4
0
def get_net_parameters(opt):
    net = DD()
    net.model = opt.model
    net.nL = opt.num_layers
    net.nH = opt.num_heads
    net.hSize = opt.hidden_dim
    net.edpt = opt.embedding_dropout
    net.adpt = opt.attention_dropout
    net.rdpt = opt.residual_dropout
    net.odpt = opt.output_dropout
    net.pt = opt.pretrain
    net.afn = opt.activation

    # how to intialize parameters
    # format is gauss+{}+{}.format(mean, std)
    # n = the default initialization pytorch
    net.init = opt.init

    return net
コード例 #5
0
def get_training_parameters(opt):
    train = DD()
    static = DD()
    static.exp = opt.exp

    static.seed = opt.random_seed

    # weight decay
    static.l2 = opt.l2
    static.vl2 = True
    static.lrsched = opt.learning_rate_schedule  # 'warmup_linear'
    static.lrwarm = opt.learning_rate_warmup  # 0.002

    # gradient clipping
    static.clip = opt.clip

    # what loss function to use
    static.loss = opt.loss

    dynamic = DD()
    dynamic.lr = opt.learning_rate  # learning rate
    dynamic.bs = opt.batch_size  # batch size
    # optimizer to use {adam, rmsprop, etc.}
    dynamic.optim = opt.optimizer

    # rmsprop
    # alpha is interpolation average

    static.update(opt[dynamic.optim])

    train.static = static
    train.dynamic = dynamic

    return train
コード例 #6
0
def get_parameters(opt, exp_type="model"):
    params = DD()
    params.net = DD()

    params.mle = 0
    params.dataset = opt.dataset

    params.net = get_net_parameters(opt)
    params.train = get_training_parameters(opt)

    params.model = params.net.model
    params.exp = opt.exp

    params.data = get_data_parameters(opt, params.exp, params.dataset)
    params.eval = get_eval_parameters(opt, params.data.get("categories", None))

    meta = DD()

    params.trainer = opt.trainer

    meta.iterations = int(opt.iterations)
    meta.cycle = opt.cycle
    params.cycle = opt.cycle
    params.iters = int(opt.iterations)

    global toy
    toy = opt.toy

    global do_gen
    do_gen = opt.do_gen

    global save
    save = opt.save

    global test_save
    test_save = opt.test_save

    global save_strategy
    save_strategy = opt.save_strategy

    print(params)
    return params, meta
コード例 #7
0
import os
import sys

sys.path.append(os.getcwd())

import torch
import src.data.conceptnet as cdata
import src.data.data as data

from utils.utils import DD
import utils.utils as utils
import random
from src.data.utils import TextEncoder
from tqdm import tqdm

opt = DD()
opt.dataset = "conceptnet"
opt.exp = "generation"

opt.data = DD()

# Use relation embeddings rather than
# splitting relations into its component words
# Set to "language" for using component words
# Set to "relation" to use unlearned relation embeddings
opt.data.rel = "language"

# Use 100k training set
opt.data.trainsize = 100

# Use both dev sets (v1 an v2)
コード例 #8
0
ファイル: bleu_atomic.py プロジェクト: Heidelberg-NLP/COINS
            data_params[case.split("_")[0]] = case.split("_")[1].split("#")
        else:
            data_params[case.split("_")[0]] = case.split("_")[1]
    return data_params


gens_file = args.gens_file
split = gens_file.split("/")[-1].split(".")[0]
n = args.n


def flatten(outer):
    return [el for key in outer for el in key]


opt = DD()
opt.data = DD()
opt.dataset = "atomic"
opt.exp = "generation"

data_params = get_data_params(gens_file)

categories = data_params[
    "categories"]  #sorted(["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"])

opt.data.categories = data_params["categories"]

if "maxe1" in data_params:
    opt.data.maxe1 = data_params["maxe1"]
    opt.data.maxe2 = data_params["maxe2"]
    opt.data.maxr = data_params["maxr"]
コード例 #9
0
#parser.add_argument("--n_per_node", type=int, default=3)
parser.add_argument("--n_per_node", type=int, nargs=3, default=[1, 1, 1], help='n per node for train, dev, test')
parser.add_argument("--comet", action='store_true', default=False, help='create data of the form s,r -> o, where sro are triples from ATOMIC')
parser.add_argument("--pathcomet", action='store_true', default=False, help='create data of the form path,s,r -> o, where sro are triples from ATOMIC')
#parser.add_argument("--add_orig", action='store_true', default=False, help='when generating pathcomet, add the original comet triples as well (train)')
parser.add_argument("--add_orig", type=int, nargs=3, default=[0, 0, 0], help='when generating pathcomet, add the original comet triples as well (train)')

args = parser.parse_args()

print("\n### ATOMIC Data Generation ###")
pprint.pprint(vars(args))
print('\n')

assert not (args.comet and args.pathcomet)

opt = DD()
opt.dataset = "atomic"
opt.exp = "generation"
opt.data = DD()
opt.data.categories = sorted(categories)

# Additionally add our options HERE
opt.pickled_data = args.pickled_data
opt.n_train = args.n_train
opt.n_dev = args.n_dev
opt.n_test = args.n_test
opt.max_path_len = args.max_path_len
opt.n_per_node = args.n_per_node
opt.comet = args.comet
opt.pathcomet = args.pathcomet
opt.add_orig = args.add_orig
コード例 #10
0
import src.data.data as data
from utils.utils import DD
import utils.utils as utils
import random
from src.data.utils import TextEncoder
from tqdm import tqdm
import torch

# Manually change the set of categories you don't want to include
# if you want to be able to train on a separate set of categories
categories = []
categories += ["Intent"]


opt = DD()
opt.dataset = "motiv_sent"
opt.exp = "generation"
opt.data = DD()
opt.data.categories = sorted(categories)

encoder_path = "model/encoder_bpe_40000.json"
bpe_path = "model/vocab_40000.bpe"

text_encoder = TextEncoder(encoder_path, bpe_path)

encoder = text_encoder.encoder
n_vocab = len(text_encoder.encoder)

special = [data.start_token, data.end_token]
special += ["<{}>".format(cat) for cat in categories]
コード例 #11
0
ファイル: config.py プロジェクト: langley/MultiCOMET
def get_data_parameters(opt, experiment, dataset):#####opt.exp & opt.data used for data loader path
    data = DD() ##################
    if dataset == "atomic":
        data.categories = sorted(opt.categories)
        #ORIGINAL CODE
        # hard-coded
        data.maxe1 = 17
        data.maxe2 = 35
        data.maxr = 1
        #ADRIAN ADDED      ################THIS IS WHERE YOU CHANGE PARAMETERS FOR NAME OF LOADER YOU'RE USING
        ##############MAXE1 == MAX EVENT
        ############MAXE2 == MAX EFFECT
        data.maxe1 = 50
        data.maxe2 = 35
        data.maxr = 1
        ###
        

    elif dataset == "conceptnet":
        data.rel = opt.relation_format
        data.trainsize = opt.training_set_size
        data.devversion = opt.development_set_versions_to_use
        data.maxe1 = opt.max_event_1_size
        data.maxe2 = opt.max_event_2_size
        if data.rel == "language":
            # hard-coded
            data.maxr = 5
        else:
            # hard-coded
            data.maxr = 1

    return data############
コード例 #12
0
def load_default():
    with open(abs_path("config_default.json"), 'r') as f:
        config = json.load(f)
    config = DD(config)
    config.net = DD(config.net)
    config.train = DD(config.train)
    config.train.static = DD(config.train.static)
    config.train.dynamic = DD(config.train.dynamic)
    config.data = DD(config.data)
    config.eval = DD(config.eval)
    config.train.dynamic.epoch = 0
    return DD(config)
コード例 #13
0
def get_meta(config):
    meta = DD()
    meta.iterations = int(config.iters)
    meta.cycle = config.cycle
    return meta