def forward(self, incoming): inp = Storage() inp.embedding = incoming.resp.embedding inp.post = incoming.hidden.h inp.post_length = incoming.data.post_length inp.resp_length = incoming.data.resp_length incoming.gen = gen = Storage() inp.init_h = incoming.conn.init_h inp.embLayer = incoming.resp.embLayer inp.max_sent_length = self.args.max_sent_length inp.sampling_proba = incoming.args.sampling_proba inp.dm = self.param.volatile.dm inp.batch_size = incoming.data.batch_size self.scheduledTeacherForcing(inp, gen) w_o_f = flattenSequence(gen.w_pro, incoming.data.resp_length - 1) data_f = flattenSequence(incoming.data.resp[1:], incoming.data.resp_length - 1) incoming.result.word_loss = self.lossCE(w_o_f, data_f) incoming.result.perplexity = torch.exp(incoming.result.word_loss)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser( description= 'A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\ dropout and batchnorm is supported.') args = Storage() parser.add_argument( '--name', type=str, default=None, help= 'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", defaultly use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--eh_size', type=int, default=384, help='Size of encoder GRU') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument( '--droprate', type=float, default=0, help= 'The probability to be zerod in dropout. 0 indicates for don\'t use dropout' ) parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument( '--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='beam', help= 'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: beam') parser.add_argument( '--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument( '--length_penalty', type=float, default=0.7, help= 'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.' ) parser.add_argument('--dataset', type=str, default='OpenSubtitles', help='Dataloader class. Default: OpenSubtitles') parser.add_argument( '--datapath', type=str, default='resources://OpenSubtitles', help='Directory for data set. Default: resources://OpenSubtitles') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument( '--wvclass', type=str, default='Glove', help= "Wordvector class, none for not using pretrained wordvec. Default: Glove" ) parser.add_argument( '--wvpath', type=str, default="resources://Glove300d", help= "Resources of pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--bert_model', type=str, default="bert-base-uncased", help="Name of bert model. Default: bert-base-uncased") parser.add_argument('--bert_vocab', type=str, default="bert-base-uncased", help="Name of bert vocab. Default: bert-base-uncased") parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.bert_model = cargs.bert_model args.bert_vocab = cargs.bert_vocab args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = 500 args.embedding_size = 300 args.eh_size = cargs.eh_size args.dh_size = cargs.dh_size args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = 1e-3 args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args, load_exclude_set, restoreCallback)
def run(): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A language model') args = Storage() parser.add_argument( '--name', type=str, default='LM', help= 'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument( '--restore', type=str, default='last', help= 'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)' ) parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dataset', type=str, default='MSCOCO', help='Dataloader class. Default: MSCOCO') parser.add_argument('--datapath', type=str, default='./data', help='Directory for data set. Default: ./data') parser.add_argument('--epoch', type=int, default=10, help="Epoch for trainning. Default: 10") parser.add_argument( '--wvclass', type=str, default=None, help= "Wordvector class, None for using Glove pretrained wordvec. Default: None" ) parser.add_argument('--wvpath', type=str, default=None, help="Path for pretrained wordvector. Default: None") parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) cargs = parser.parse_args() # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.softmax_samples = 512 args.embedding_size = 300 args.dh_size = 200 args.lr = 1e-1 args.lr_decay = 0.995 args.momentum = 0.9 args.batch_size = 128 args.grad_clip = 5.0 args.show_sample = [0] args.max_sen_length = 50 args.checkpoint_steps = 1000 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args)
def run(argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A CVAE model') args = Storage() parser.add_argument( '--name', type=str, default='CVAE', help= 'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument( '--restore', type=str, default='last', help= 'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)' ) parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dataset', type=str, default='SwitchboardCorpus', help='Dataloader class. Default: SwitchboardCorpus') parser.add_argument( '--datapath', type=str, default='resources://SwitchboardCorpus', help='Directory for data set. Default: SwitchboardCorpus') parser.add_argument('--epoch', type=int, default=100, help="Epoch for trainning. Default: 100") parser.add_argument( '--wvclass', type=str, default='Glove', help= "Wordvector class, none for not using pretrained wordvec. Default: Glove" ) parser.add_argument( '--wvpath', type=str, default="resources://Glove200d", help= "Directory for pretrained wordvector. Default: resources://Glove200d") parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.softmax_samples = 512 args.use_hcf = True args.full_kl_step = 10000 args.latent_size = 200 args.topic_embedding_size = 30 args.da_embedding_size = 30 args.word_embedding_size = 200 args.session_window = 10 args.repeat_N = 2 args.eh_size = 300 args.ch_size = 600 args.dh_size = 400 args.lr = 1e-3 args.lr_decay = 0.995 args.batch_size = 3 args.grad_clip = 5.0 args.show_sample = [0] args.min_vocab_times = 5 args.max_sen_length = 50 args.max_turn_length = 1000 args.checkpoint_steps = 1 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args) if args.mode == 'test': import os import json res = {'working_dir': './', 'entry': 'run', 'args': argv} if os.path.exists("./result.json"): res.update(json.load(open("./result.json"))) json.dump(res, open("result.json", "w"))
def run(*argv): parser = argparse.ArgumentParser(description='A hred model') args = Storage() parser.add_argument( '--name', type=str, default='hred', help= 'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument( '--restore', type=str, default='best', help= 'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)' ) parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dataset', type=str, default='MyHRED', help='Dataloader class. Default: UbuntuCorpus') parser.add_argument('--datapath', type=str, default='../data/film', help='Directory for data set. Default: UbuntuCorpus') parser.add_argument('--epoch', type=int, default=20, help="Epoch for trainning. Default: 100") parser.add_argument('--batch_size', type=int, default=32, help="The batch size of data when train or test.") parser.add_argument('--max_sent_length', type=int, default=512, help="The max encoded sent length when train.") parser.add_argument('--max_decoder_length', type=int, default=50, help="The max decoded sent length when inference.") parser.add_argument('--num_turns', type=int, default=8, help="The max number of turns of the post field.") parser.add_argument( '--wv_class', type=str, default='TencentChinese', help= "Wordvector class, none for not using pretrained wordvec. Default: Glove" ) parser.add_argument( '--wv_path', type=str, default='wordvector/chinese', help= "Directory for pretrained wordvector. Default: resources://Glove300d") parser.add_argument( '--output_dir', type=str, default="./output/film", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard/film", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model/film", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache/film", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) parser.add_argument('--seed', type=int, default=42, help="The random seed in the train process.") cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.batch_size = cargs.batch_size args.wv_class = cargs.wv_class args.wv_path = cargs.wv_path args.output_dir = cargs.output_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.seed = cargs.seed args.max_sent_length = cargs.max_sent_length args.max_decoder_length = cargs.max_decoder_length args.num_turns = cargs.num_turns args.softmax_samples = 512 args.embedding_size = 200 args.eh_size = 200 args.ch_size = 200 args.dh_size = 200 args.lr = 1e-3 args.lr_decay = 0.99 args.grad_clip = 5.0 args.show_sample = [0] args.checkpoint_steps = 100 args.checkpoint_max_to_keep = 5 random.seed(args.seed) main(args)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser( description='A seqGAN language generation model') args = Storage() parser.add_argument( '--name', type=str, default='seqGAN', help= 'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument( '--restore', type=str, default='last', help= 'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)' ) parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dataset', type=str, default='MSCOCO', help='Dataloader class. Default: MSCOCO') parser.add_argument('--datapath', type=str, default='MSCOCO#MSCOCO', help='Directory for data set. Default: MSCOCO#MSCOCO') parser.add_argument( '--wvclass', type=str, default='Glove', help= "Wordvector class, None for using Glove pretrained wordvec. Default: Glove" ) parser.add_argument( '--wvpath', type=str, default="resources://Glove300d", help="Path for pretrained wordvector. Default: resources://Glove300d") parser.add_argument( '--pre_train', type=str, default="True", help='Pre-train the generator and discriminator. Default: True') parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.global_step = 0 args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.pre_train = True if cargs.pre_train == "True" else False args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.sample = 100 args.test_sample = None args.softmax_samples = 512 args.embedding_size = 300 args.eh_size = 200 args.dh_size = 200 args.z_dim = 100 args.min_kl = 10 args.full_kl_step = 30000 args.lr = 1e-1 args.lr_decay = 0.995 args.momentum = 0.9 args.batch_size = 128 args.grad_clip = 5.0 args.show_sample = [0] args.checkpoint_steps = 1000 args.checkpoint_max_to_keep = 5 args.teacher_forcing = True args.gen_pre_epoch_num = 25 #120 #Number of pretraining epoch args.dis_pre_epoch_num = 1 #pretraining times of discriminator args.total_adv_batch = 200 #total batch used for adversarial training args.gen_adv_batch_num = 120 #update times of generator in adversarial training args.test_per_epoch = 5 args.rollout_num = 5 #Rollout number for reward estimation args.dis_adv_epoch_num = 1 #5 #update times of discriminator in adversarial training args.dis_dropout_keep_prob = 0.75 # dropout rate of discriminator args.num_classes = 2 #number of class (real and fake) args.dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] #convolutional kernel size of discriminator args.dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] #number of filters of each conv. kernel args.dis_dropout_keep_prob = 0.75 # dropout rate of discriminator args.dis_l2_reg_lambda = 0.2 #L2 regularization strength args.dis_lr = 1e-4 #Learning rate of discriminator args.pre_run_batch = None import random random.seed(0) from main import main st = time.time() main(args)
def run(): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A seq2seq model') args = Storage() parser.add_argument('--name', type=str, default=None, help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)') parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", defaultly use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dataset', type=str, default='SkeletonToStory', help='Dataloader class. Default: SkeletonToStory') parser.add_argument('--datapath', type=str, default='./data', help='Directory for data set. Default: ./data') parser.add_argument('--epoch', type=int, default=100, help="Epoch for trainning. Default: 100") parser.add_argument('--wvclass', type=str, default=None, help="Wordvector class, none for not using pretrained wordvec. Default: None") parser.add_argument('--wvpath', type=str, default="./wordvec", help="Directory for pretrained wordvector. Default: ./wordvec") parser.add_argument('--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument('--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument('--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument('--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument('--cache', action='store_true', help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)') cargs = parser.parse_args() # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu # The following arguments are not controlled by command line. args.restore_optimizer = True args.load_exclude_set = [] args.restoreCallback = None args.batch_per_epoch = 1500 args.embedding_size = 300 args.eh_size = 200 args.dh_size = 200 args.lr = 1e-3 args.batch_size = 30 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensotboard args.max_sen_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser( description='A language model with GRU. Attention, beamsearch,\ dropout and batchnorm is supported.') args = Storage() parser.add_argument( '--name', type=str, default=None, help= 'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", by default use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument( '--droprate', type=float, default=0, help= 'The probability to be zeroed in dropout. 0 indicates for don\'t use dropout' ) parser.add_argument( '--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='samplek', help= 'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: samplek') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument( '--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument( '--length_penalty', type=float, default=0.7, help= 'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.' ) parser.add_argument('--temperature', type=float, default=1, help='Temperature. Default: 1') parser.add_argument( '--dataid', type=str, default='resources://MSCOCO', help='Resources/path for data set. Default: resources://MSCOCO') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument('--batch_per_epoch', type=int, default=500, help="Batches per epoch. Default: 1500") parser.add_argument( '--wvid', type=str, default="resources://Glove300d", help= "Resources/path for pretrained wordvector. Default: resources://Glove300d" ) parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) parser.add_argument('--seed', type=int, default=0, help='Specify random seed. Default: 0') parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate. Default: 0.001') cargs = parser.parse_args(argv) # general setting args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu ## dataset settings args.dataid = cargs.dataid args.tokenizer = "space" args.max_sent_length = 50 args.convert_to_lower_letter = False args.min_frequent_vocab_times = 10 args.min_rare_vocab_times = 0 args.wvid = cargs.wvid ## training settings args.epochs = cargs.epoch args.lr = cargs.lr args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 ## arguments for restoring checkpoints args.restore_optimizer = True load_exclude_set = [] restoreCallback = None ## architecture settings args.batch_per_epoch = cargs.batch_per_epoch args.embedding_size = 300 args.dh_size = cargs.dh_size args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm ## decoding settings args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.temperature = cargs.temperature ## random seed args.seed = cargs.seed import random random.seed(cargs.seed) import torch torch.manual_seed(cargs.seed) import numpy as np np.random.seed(cargs.seed) from main import main main(args, load_exclude_set, restoreCallback)
args.hist_weights = cargs.hist_weights if args.hist_len != len(args.hist_weights): raise ValueError('the hist_len should be equal to the length of weights') args.hist_weights = np.array(args.hist_weights) / sum(args.hist_weights) # The following arguments are not controlled by command line. args.restore_optimizer = False args.load_exclude_set = [] args.restoreCallback = None args.batch_num_per_gradient = 1 args.embedding_size = 300 args.eh_size = 200 args.dh_size = 400 args.lr = 5e-4 args.batch_size = 8 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensotboard args.checkpoint_steps = 3 args.checkpoint_max_to_keep = 3 args.checkpoint_epoch = 5 random.seed(cargs.seed) np.random.seed(cargs.seed) torch.manual_seed(cargs.seed) random.seed(cargs.seed) torch.cuda.manual_seed(cargs.seed) torch.cuda.manual_seed_all(cargs.seed) main(args)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A classification model with GRU encoder and MLP for prediction. \ Dropout and batchnorm is supported.') args = Storage() parser.add_argument('--name', type=str, default=None, help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)') parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", defaultly use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--eh_size', type=int, default=200, help='Size of encoder GRU') parser.add_argument('--class_num', type=int, default=5, help='Number of classes') parser.add_argument('--droprate', type=float, default=0, help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument('--dataset', type=str, default='SST', help='Dataloader class. Default: SST') parser.add_argument('--datapath', type=str, default='resources://SST', help='Directory for data set. Default: resources://SST') parser.add_argument('--epoch', type=int, default=100, help="Epoch for trainning. Default: 100") parser.add_argument('--wvclass', type=str, default='Glove', help="Wordvector class, none for not using pretrained wordvec. Default: Glove") parser.add_argument('--wvpath', type=str, default="resources://Glove300d", help="Directory for pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument('--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument('--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument('--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument('--cache', action='store_true', help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)') cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = 500 args.embedding_size = 300 args.eh_size = cargs.eh_size args.class_num = cargs.class_num args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = 1e-3 args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args, load_exclude_set, restoreCallback)
args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.softmax_samples = 512 args.embedding_size = 300 args.eh_size = 200 args.dh_size = 200 args.lr = 1e-3 args.lr_decay = 0.995 args.batch_size = 128 args.grad_clip = 5.0 args.show_sample = [0] args.max_sen_length = 50 args.checkpoint_steps = 1000 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\ dropout and batchnorm is supported. It can train using RAML, Scheduled Sampling or Policy Gradient algorithms.') args = Storage() parser.add_argument('--name', type=str, default=None, help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)') parser.add_argument('--model', type=str, default="basic",choices=["basic","raml","scheduled-sampling","policy-gradient"], help='The type of algorithm. Choices: basic, raml, schedule-sampling, policy-gradient. Default: basic Seq2seq') parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", by default use last model you run. \ It can also be an url started with "http". \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate. Default: 0.001') parser.add_argument('--eh_size', type=int, default=200, help='Size of encoder GRU') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument('--droprate', type=float, default=0, help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument('--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='beam', help='The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: beam') parser.add_argument('--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument('--length_penalty', type=float, default=0.7, help='The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.') parser.add_argument('--dataset', type=str, default='OpenSubtitles', help='Dataloader class. Default: OpenSubtitles') parser.add_argument('--dataid', type=str, default='resources://OpenSubtitles#OpenSubtitles', help='Resource id for data set. It can be a resource name or a local path. Default: resources://OpenSubtitles#OpenSubtitles') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument('--batch_per_epoch', type=int, default=1500, help="Batches per epoch. Default: 1500") parser.add_argument('--wvclass', type=str, default='Glove', help="Wordvector class, none for not using pretrained wordvec. Default: Glove") parser.add_argument('--wvid', type=str, default="resources://Glove300d", help="Resource id for pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument('--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument('--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument('--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--device', type=int, default=0, help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument('--cache', action='store_true', help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)') parser.add_argument('--seed', type=int, default=0, help='Specify random seed. Default: 0') # RAML parameters parser.add_argument('--raml_file', type=str, default='samples_iwslt14.txt', help='the samples and rewards described in RAML') parser.add_argument('--n_samples', type=int, default=10, help='number of samples for every target sentence') parser.add_argument('--tau', type=float, default=0.4, help='the temperature in RAML algorithm') # Scheduled sampling parameters parser.add_argument('--decay_factor', type=float, default=500., help='The hyperparameter controling the speed of increasing ' 'the probability of sampling from model. Default: 500.') # Policy Gradient parameters parser.add_argument('--epoch_teacherForcing', type=int, default=10, help='How long to run teacherForcing before running policy gradient. Default: 10') parser.add_argument('--nb_sample_training', type=int, default=20, help='How many samples we take for each batch during policy gradient. Default: 20') parser.add_argument('--policy_gradient_reward_mode', type=str, default='mean', help='How the policy gradient is applied. Default: mean') cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.model = cargs.model args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.dataid args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvid args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.device = cargs.device # RAML parameters args.raml_file = cargs.raml_file args.n_samples = cargs.n_samples args.tau = cargs.tau # Scheduled sampling parameters args.decay_factor = cargs.decay_factor # Policy Gradient parameters args.epoch_teacherForcing = cargs.epoch_teacherForcing # How long to run teacherForcing before running policy gradient args.nb_sample_training = cargs.nb_sample_training # How many samples we take for each batch during policy gradient args.policy_gradient_reward_mode = cargs.policy_gradient_reward_mode # How many samples we take for each batch during policy gradient # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = cargs.batch_per_epoch args.embedding_size = 300 args.eh_size = cargs.eh_size args.dh_size = cargs.dh_size args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = cargs.lr args.batch_size = 3*args.n_samples if args.model=="raml" else 32 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 args.seed = cargs.seed import random random.seed(cargs.seed) import torch torch.manual_seed(cargs.seed) import numpy as np np.random.seed(cargs.seed) from main import main main(args, load_exclude_set, restoreCallback)