Beispiel #1
0
def run(*argv):
    import argparse
    import time

    from utils import Storage

    parser = argparse.ArgumentParser(
        description=
        'A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\
		dropout and batchnorm is supported.')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default=None,
        help=
        'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument('--restore',
                        type=str,
                        default=None,
                        help='Checkpoints name to load. \
			"NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \
			You can also use "last" and "best", defaultly use last model you run. \
			Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \
			"last" and "best" are not guaranteed to work when 2 models run in the same time.\
			Default: None (don\'t load anything)')
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')

    parser.add_argument('--eh_size',
                        type=int,
                        default=384,
                        help='Size of encoder GRU')
    parser.add_argument('--dh_size',
                        type=int,
                        default=200,
                        help='Size of decoder GRU')
    parser.add_argument(
        '--droprate',
        type=float,
        default=0,
        help=
        'The probability to be zerod in dropout. 0 indicates for don\'t use dropout'
    )
    parser.add_argument('--batchnorm',
                        action='store_true',
                        help='Use bathnorm')
    parser.add_argument(
        '--decode_mode',
        type=str,
        choices=['max', 'sample', 'gumbel', 'samplek', 'beam'],
        default='beam',
        help=
        'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \
			samplek(sample from topk), beam(beamsearch). Default: beam')
    parser.add_argument(
        '--top_k',
        type=int,
        default=10,
        help='The top_k when decode_mode == "beam" or "samplek"')
    parser.add_argument(
        '--length_penalty',
        type=float,
        default=0.7,
        help=
        'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.'
    )

    parser.add_argument('--dataset',
                        type=str,
                        default='OpenSubtitles',
                        help='Dataloader class. Default: OpenSubtitles')
    parser.add_argument(
        '--datapath',
        type=str,
        default='resources://OpenSubtitles',
        help='Directory for data set. Default: resources://OpenSubtitles')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help="Epoch for training. Default: 100")
    parser.add_argument(
        '--wvclass',
        type=str,
        default='Glove',
        help=
        "Wordvector class, none for not using pretrained wordvec. Default: Glove"
    )
    parser.add_argument(
        '--wvpath',
        type=str,
        default="resources://Glove300d",
        help=
        "Resources of pretrained wordvector. Default: resources://Glove300d")
    parser.add_argument('--bert_model',
                        type=str,
                        default="bert-base-uncased",
                        help="Name of bert model. Default: bert-base-uncased")
    parser.add_argument('--bert_vocab',
                        type=str,
                        default="bert-base-uncased",
                        help="Name of bert vocab. Default: bert-base-uncased")

    parser.add_argument(
        '--out_dir',
        type=str,
        default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )
    cargs = parser.parse_args(argv)

    # Editing following arguments to bypass command line.
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.datapath
    args.epochs = cargs.epoch
    args.wvclass = cargs.wvclass
    args.wvpath = cargs.wvpath
    args.bert_model = cargs.bert_model
    args.bert_vocab = cargs.bert_vocab
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu

    # The following arguments are not controlled by command line.
    args.restore_optimizer = True
    load_exclude_set = []
    restoreCallback = None

    args.batch_per_epoch = 500
    args.embedding_size = 300
    args.eh_size = cargs.eh_size
    args.dh_size = cargs.dh_size

    args.decode_mode = cargs.decode_mode
    args.top_k = cargs.top_k
    args.length_penalty = cargs.length_penalty

    args.droprate = cargs.droprate
    args.batchnorm = cargs.batchnorm

    args.lr = 1e-3
    args.batch_size = 64
    args.batch_num_per_gradient = 4
    args.grad_clip = 5
    args.show_sample = [0]  # show which batch when evaluating at tensorboard
    args.max_sent_length = 50
    args.checkpoint_steps = 20
    args.checkpoint_max_to_keep = 5

    import random
    random.seed(0)

    from main import main
    main(args, load_exclude_set, restoreCallback)
Beispiel #2
0
def run(argv):
    import argparse
    import time

    from utils import Storage

    parser = argparse.ArgumentParser(description='A CVAE model')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default='CVAE',
        help=
        'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument(
        '--restore',
        type=str,
        default='last',
        help=
        'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)'
    )
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')
    parser.add_argument('--dataset',
                        type=str,
                        default='SwitchboardCorpus',
                        help='Dataloader class. Default: SwitchboardCorpus')
    parser.add_argument(
        '--datapath',
        type=str,
        default='resources://SwitchboardCorpus',
        help='Directory for data set. Default: SwitchboardCorpus')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help="Epoch for trainning. Default: 100")
    parser.add_argument(
        '--wvclass',
        type=str,
        default='Glove',
        help=
        "Wordvector class, none for not using pretrained wordvec. Default: Glove"
    )
    parser.add_argument(
        '--wvpath',
        type=str,
        default="resources://Glove200d",
        help=
        "Directory for pretrained wordvector. Default: resources://Glove200d")

    parser.add_argument(
        '--out_dir',
        type=str,
        default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )
    cargs = parser.parse_args(argv)

    # Editing following arguments to bypass command line.
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.datapath
    args.epochs = cargs.epoch
    args.wvclass = cargs.wvclass
    args.wvpath = cargs.wvpath
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu

    args.softmax_samples = 512
    args.use_hcf = True
    args.full_kl_step = 10000
    args.latent_size = 200
    args.topic_embedding_size = 30
    args.da_embedding_size = 30
    args.word_embedding_size = 200
    args.session_window = 10
    args.repeat_N = 2
    args.eh_size = 300
    args.ch_size = 600
    args.dh_size = 400
    args.lr = 1e-3
    args.lr_decay = 0.995
    args.batch_size = 3
    args.grad_clip = 5.0
    args.show_sample = [0]
    args.min_vocab_times = 5
    args.max_sen_length = 50
    args.max_turn_length = 1000
    args.checkpoint_steps = 1
    args.checkpoint_max_to_keep = 5

    import random
    random.seed(0)

    from main import main

    main(args)
    if args.mode == 'test':
        import os
        import json
        res = {'working_dir': './', 'entry': 'run', 'args': argv}
        if os.path.exists("./result.json"):
            res.update(json.load(open("./result.json")))
        json.dump(res, open("result.json", "w"))
Beispiel #3
0
def run():
    import argparse
    import time

    from utils import Storage

    parser = argparse.ArgumentParser(description='A language model')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default='LM',
        help=
        'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument(
        '--restore',
        type=str,
        default='last',
        help=
        'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)'
    )
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')
    parser.add_argument('--dataset',
                        type=str,
                        default='MSCOCO',
                        help='Dataloader class. Default: MSCOCO')
    parser.add_argument('--datapath',
                        type=str,
                        default='./data',
                        help='Directory for data set. Default: ./data')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help="Epoch for trainning. Default: 10")
    parser.add_argument(
        '--wvclass',
        type=str,
        default=None,
        help=
        "Wordvector class, None for using Glove pretrained wordvec. Default: None"
    )
    parser.add_argument('--wvpath',
                        type=str,
                        default=None,
                        help="Path for pretrained wordvector. Default: None")

    parser.add_argument(
        '--out_dir',
        type=str,
        default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )
    cargs = parser.parse_args()

    # Editing following arguments to bypass command line.
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.datapath
    args.epochs = cargs.epoch
    args.wvclass = cargs.wvclass
    args.wvpath = cargs.wvpath
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu

    args.softmax_samples = 512
    args.embedding_size = 300
    args.dh_size = 200
    args.lr = 1e-1
    args.lr_decay = 0.995
    args.momentum = 0.9
    args.batch_size = 128
    args.grad_clip = 5.0
    args.show_sample = [0]
    args.max_sen_length = 50
    args.checkpoint_steps = 1000
    args.checkpoint_max_to_keep = 5

    import random
    random.seed(0)

    from main import main
    main(args)
Beispiel #4
0
def run(*argv):
    import argparse
    import time

    from utils import Storage

    parser = argparse.ArgumentParser(
        description='A seqGAN language generation model')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default='seqGAN',
        help=
        'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument(
        '--restore',
        type=str,
        default='last',
        help=
        'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)'
    )
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')
    parser.add_argument('--dataset',
                        type=str,
                        default='MSCOCO',
                        help='Dataloader class. Default: MSCOCO')
    parser.add_argument('--datapath',
                        type=str,
                        default='MSCOCO#MSCOCO',
                        help='Directory for data set. Default: MSCOCO#MSCOCO')
    parser.add_argument(
        '--wvclass',
        type=str,
        default='Glove',
        help=
        "Wordvector class, None for using Glove pretrained wordvec. Default: Glove"
    )
    parser.add_argument(
        '--wvpath',
        type=str,
        default="resources://Glove300d",
        help="Path for pretrained wordvector. Default: resources://Glove300d")
    parser.add_argument(
        '--pre_train',
        type=str,
        default="True",
        help='Pre-train the generator and discriminator. Default: True')

    parser.add_argument(
        '--out_dir',
        type=str,
        default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )

    cargs = parser.parse_args(argv)

    # Editing following arguments to bypass command line.
    args.global_step = 0
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.datapath
    args.wvclass = cargs.wvclass
    args.wvpath = cargs.wvpath
    args.pre_train = True if cargs.pre_train == "True" else False
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu

    args.sample = 100
    args.test_sample = None
    args.softmax_samples = 512
    args.embedding_size = 300
    args.eh_size = 200
    args.dh_size = 200
    args.z_dim = 100
    args.min_kl = 10
    args.full_kl_step = 30000
    args.lr = 1e-1
    args.lr_decay = 0.995
    args.momentum = 0.9
    args.batch_size = 128
    args.grad_clip = 5.0
    args.show_sample = [0]
    args.checkpoint_steps = 1000
    args.checkpoint_max_to_keep = 5
    args.teacher_forcing = True

    args.gen_pre_epoch_num = 25  #120 #Number of pretraining epoch
    args.dis_pre_epoch_num = 1  #pretraining times of discriminator
    args.total_adv_batch = 200  #total batch used for adversarial training
    args.gen_adv_batch_num = 120  #update times of generator in adversarial training
    args.test_per_epoch = 5
    args.rollout_num = 5  #Rollout number for reward estimation
    args.dis_adv_epoch_num = 1  #5 #update times of discriminator in adversarial training
    args.dis_dropout_keep_prob = 0.75  # dropout rate of discriminator
    args.num_classes = 2  #number of class (real and fake)
    args.dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15,
                             20]  #convolutional kernel size of discriminator
    args.dis_num_filters = [
        100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160
    ]  #number of filters of each conv. kernel
    args.dis_dropout_keep_prob = 0.75  # dropout rate of discriminator
    args.dis_l2_reg_lambda = 0.2  #L2 regularization strength
    args.dis_lr = 1e-4  #Learning rate of discriminator
    args.pre_run_batch = None

    import random
    random.seed(0)

    from main import main
    st = time.time()
    main(args)
Beispiel #5
0
def run(*argv):
    parser = argparse.ArgumentParser(description='A hred model')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default='hred',
        help=
        'The name of your model, used for variable scope and tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument(
        '--restore',
        type=str,
        default='best',
        help=
        'Checkpoints name to load. "last" for last checkpoints, "best" for best checkpoints on dev. Attention: "last" and "best" wiil cause unexpected behaviour when run 2 models in the same dir at the same time. Default: None (don\'t load anything)'
    )
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')
    parser.add_argument('--dataset',
                        type=str,
                        default='MyHRED',
                        help='Dataloader class. Default: UbuntuCorpus')
    parser.add_argument('--datapath',
                        type=str,
                        default='../data/film',
                        help='Directory for data set. Default: UbuntuCorpus')
    parser.add_argument('--epoch',
                        type=int,
                        default=20,
                        help="Epoch for trainning. Default: 100")
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help="The batch size of data when train or test.")
    parser.add_argument('--max_sent_length',
                        type=int,
                        default=512,
                        help="The max encoded sent length when train.")
    parser.add_argument('--max_decoder_length',
                        type=int,
                        default=50,
                        help="The max decoded sent length when inference.")
    parser.add_argument('--num_turns',
                        type=int,
                        default=8,
                        help="The max number of turns of the post field.")
    parser.add_argument(
        '--wv_class',
        type=str,
        default='TencentChinese',
        help=
        "Wordvector class, none for not using pretrained wordvec. Default: Glove"
    )
    parser.add_argument(
        '--wv_path',
        type=str,
        default='wordvector/chinese',
        help=
        "Directory for pretrained wordvector. Default: resources://Glove300d")

    parser.add_argument(
        '--output_dir',
        type=str,
        default="./output/film",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard/film",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model/film",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache/film",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="The random seed in the train process.")
    cargs = parser.parse_args(argv)

    # Editing following arguments to bypass command line.
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.datapath
    args.epochs = cargs.epoch
    args.batch_size = cargs.batch_size
    args.wv_class = cargs.wv_class
    args.wv_path = cargs.wv_path
    args.output_dir = cargs.output_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu
    args.seed = cargs.seed
    args.max_sent_length = cargs.max_sent_length
    args.max_decoder_length = cargs.max_decoder_length
    args.num_turns = cargs.num_turns

    args.softmax_samples = 512
    args.embedding_size = 200
    args.eh_size = 200
    args.ch_size = 200
    args.dh_size = 200
    args.lr = 1e-3
    args.lr_decay = 0.99
    args.grad_clip = 5.0
    args.show_sample = [0]
    args.checkpoint_steps = 100
    args.checkpoint_max_to_keep = 5

    random.seed(args.seed)

    main(args)
Beispiel #6
0
def run():
	import argparse
	import time

	from utils import Storage

	parser = argparse.ArgumentParser(description='A seq2seq model')
	args = Storage()

	parser.add_argument('--name', type=str, default=None,
		help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)')
	parser.add_argument('--restore', type=str, default=None,
		help='Checkpoints name to load. \
			"NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \
			You can also use "last" and "best", defaultly use last model you run. \
			Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \
			"last" and "best" are not guaranteed to work when 2 models run in the same time.\
			Default: None (don\'t load anything)')
	parser.add_argument('--mode', type=str, default="train",
		help='"train" or "test". Default: train')
	parser.add_argument('--dataset', type=str, default='SkeletonToStory',
		help='Dataloader class. Default: SkeletonToStory')
	parser.add_argument('--datapath', type=str, default='./data',
		help='Directory for data set. Default: ./data')
	parser.add_argument('--epoch', type=int, default=100,
		help="Epoch for trainning. Default: 100")
	parser.add_argument('--wvclass', type=str, default=None,
		help="Wordvector class, none for not using pretrained wordvec. Default: None")
	parser.add_argument('--wvpath', type=str, default="./wordvec",
		help="Directory for pretrained wordvector. Default: ./wordvec")

	parser.add_argument('--out_dir', type=str, default="./output",
		help='Output directory for test output. Default: ./output')
	parser.add_argument('--log_dir', type=str, default="./tensorboard",
		help='Log directory for tensorboard. Default: ./tensorboard')
	parser.add_argument('--model_dir', type=str, default="./model",
		help='Checkpoints directory for model. Default: ./model')
	parser.add_argument('--cache_dir', type=str, default="./cache",
		help='Checkpoints directory for cache. Default: ./cache')
	parser.add_argument('--cpu', action="store_true",
		help='Use cpu.')
	parser.add_argument('--debug', action='store_true',
		help='Enter debug mode (using ptvsd).')
	parser.add_argument('--cache', action='store_true',
		help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)')
	cargs = parser.parse_args()


	# Editing following arguments to bypass command line.
	args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime())
	args.restore = cargs.restore
	args.mode = cargs.mode
	args.dataset = cargs.dataset
	args.datapath = cargs.datapath
	args.epochs = cargs.epoch
	args.wvclass = cargs.wvclass
	args.wvpath = cargs.wvpath
	args.out_dir = cargs.out_dir
	args.log_dir = cargs.log_dir
	args.model_dir = cargs.model_dir
	args.cache_dir = cargs.cache_dir
	args.debug = cargs.debug
	args.cache = cargs.cache
	args.cuda = not cargs.cpu

	# The following arguments are not controlled by command line.
	args.restore_optimizer = True
	args.load_exclude_set = []
	args.restoreCallback = None

	args.batch_per_epoch = 1500
	args.embedding_size = 300
	args.eh_size = 200
	args.dh_size = 200
	args.lr = 1e-3
	args.batch_size = 30
	args.grad_clip = 5
	args.show_sample = [0]  # show which batch when evaluating at tensotboard
	args.max_sen_length = 50
	args.checkpoint_steps = 20
	args.checkpoint_max_to_keep = 5

	import random
	random.seed(0)

	from main import main
	main(args)
Beispiel #7
0
def run(*argv):
    import argparse
    import time

    from utils import Storage

    parser = argparse.ArgumentParser(
        description='A language model with GRU. Attention, beamsearch,\
		dropout and batchnorm is supported.')
    args = Storage()

    parser.add_argument(
        '--name',
        type=str,
        default=None,
        help=
        'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)'
    )
    parser.add_argument('--restore',
                        type=str,
                        default=None,
                        help='Checkpoints name to load. \
			"NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \
			You can also use "last" and "best", by default use last model you run. \
			Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \
			"last" and "best" are not guaranteed to work when 2 models run in the same time.\
			Default: None (don\'t load anything)')
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='"train" or "test". Default: train')

    parser.add_argument('--dh_size',
                        type=int,
                        default=200,
                        help='Size of decoder GRU')
    parser.add_argument(
        '--droprate',
        type=float,
        default=0,
        help=
        'The probability to be zeroed in dropout. 0 indicates for don\'t use dropout'
    )
    parser.add_argument(
        '--decode_mode',
        type=str,
        choices=['max', 'sample', 'gumbel', 'samplek', 'beam'],
        default='samplek',
        help=
        'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \
			samplek(sample from topk), beam(beamsearch). Default: samplek')
    parser.add_argument('--batchnorm',
                        action='store_true',
                        help='Use bathnorm')
    parser.add_argument(
        '--top_k',
        type=int,
        default=10,
        help='The top_k when decode_mode == "beam" or "samplek"')
    parser.add_argument(
        '--length_penalty',
        type=float,
        default=0.7,
        help=
        'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.'
    )
    parser.add_argument('--temperature',
                        type=float,
                        default=1,
                        help='Temperature. Default: 1')

    parser.add_argument(
        '--dataid',
        type=str,
        default='resources://MSCOCO',
        help='Resources/path for data set. Default: resources://MSCOCO')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help="Epoch for training. Default: 100")
    parser.add_argument('--batch_per_epoch',
                        type=int,
                        default=500,
                        help="Batches per epoch. Default: 1500")
    parser.add_argument(
        '--wvid',
        type=str,
        default="resources://Glove300d",
        help=
        "Resources/path for pretrained wordvector. Default: resources://Glove300d"
    )

    parser.add_argument(
        '--out_dir',
        type=str,
        default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument(
        '--log_dir',
        type=str,
        default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument(
        '--model_dir',
        type=str,
        default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument(
        '--cache_dir',
        type=str,
        default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true", help='Use cpu.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enter debug mode (using ptvsd).')
    parser.add_argument(
        '--cache',
        action='store_true',
        help=
        'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)'
    )
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Specify random seed. Default: 0')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        help='Learning rate. Default: 0.001')
    cargs = parser.parse_args(argv)

    # general setting
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S",
                                            time.localtime())
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu

    ## dataset settings
    args.dataid = cargs.dataid
    args.tokenizer = "space"
    args.max_sent_length = 50
    args.convert_to_lower_letter = False
    args.min_frequent_vocab_times = 10
    args.min_rare_vocab_times = 0
    args.wvid = cargs.wvid

    ## training settings
    args.epochs = cargs.epoch
    args.lr = cargs.lr
    args.batch_size = 64
    args.batch_num_per_gradient = 4
    args.grad_clip = 5
    args.show_sample = [0]  # show which batch when evaluating at tensorboard
    args.checkpoint_steps = 20
    args.checkpoint_max_to_keep = 5

    ## arguments for restoring checkpoints
    args.restore_optimizer = True
    load_exclude_set = []
    restoreCallback = None

    ## architecture settings
    args.batch_per_epoch = cargs.batch_per_epoch
    args.embedding_size = 300
    args.dh_size = cargs.dh_size
    args.droprate = cargs.droprate
    args.batchnorm = cargs.batchnorm

    ## decoding settings
    args.decode_mode = cargs.decode_mode
    args.top_k = cargs.top_k
    args.length_penalty = cargs.length_penalty
    args.temperature = cargs.temperature

    ## random seed
    args.seed = cargs.seed

    import random
    random.seed(cargs.seed)
    import torch
    torch.manual_seed(cargs.seed)
    import numpy as np
    np.random.seed(cargs.seed)

    from main import main

    main(args, load_exclude_set, restoreCallback)
Beispiel #8
0
cargs = parser.parse_args()

# Editing following arguments to bypass command line.
args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime())
args.restore = cargs.restore
args.mode = cargs.mode
args.dataset = cargs.dataset
args.datapath = cargs.datapath
args.epochs = cargs.epoch
args.wvclass = cargs.wvclass
args.wvpath = cargs.wvpath
args.out_dir = cargs.out_dir
args.model_dir = cargs.model_dir
args.cache_dir = cargs.cache_dir
args.debug = cargs.debug
args.cache = cargs.cache
args.cuda_num = cargs.cuda
args.cuda = not cargs.cpu

args.disentangle = cargs.disentangle
args.droprate = cargs.droprate
args.hist_len = cargs.hist_len
args.hist_weights = cargs.hist_weights
if args.hist_len != len(args.hist_weights):
    raise ValueError('the hist_len should be equal to the length of weights')
args.hist_weights = np.array(args.hist_weights) / sum(args.hist_weights)

# The following arguments are not controlled by command line.
args.restore_optimizer = False
args.load_exclude_set = []
args.restoreCallback = None
Beispiel #9
0
def run(*argv):
	import argparse
	import time

	from utils import Storage

	parser = argparse.ArgumentParser(description='A classification model with GRU encoder and MLP for prediction. \
		Dropout and batchnorm is supported.')
	args = Storage()

	parser.add_argument('--name', type=str, default=None,
		help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)')
	parser.add_argument('--restore', type=str, default=None,
		help='Checkpoints name to load. \
			"NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \
			You can also use "last" and "best", defaultly use last model you run. \
			Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \
			"last" and "best" are not guaranteed to work when 2 models run in the same time.\
			Default: None (don\'t load anything)')
	parser.add_argument('--mode', type=str, default="train",
		help='"train" or "test". Default: train')

	parser.add_argument('--eh_size', type=int, default=200,
		help='Size of encoder GRU')
	parser.add_argument('--class_num', type=int, default=5,
		help='Number of classes')
	parser.add_argument('--droprate', type=float, default=0,
		help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout')
	parser.add_argument('--batchnorm', action='store_true',
		help='Use bathnorm')

	parser.add_argument('--dataset', type=str, default='SST',
		help='Dataloader class. Default: SST')
	parser.add_argument('--datapath', type=str, default='resources://SST',
		help='Directory for data set. Default: resources://SST')
	parser.add_argument('--epoch', type=int, default=100,
		help="Epoch for trainning. Default: 100")
	parser.add_argument('--wvclass', type=str, default='Glove',
		help="Wordvector class, none for not using pretrained wordvec. Default: Glove")
	parser.add_argument('--wvpath', type=str, default="resources://Glove300d",
		help="Directory for pretrained wordvector. Default: resources://Glove300d")

	parser.add_argument('--out_dir', type=str, default="./output",
		help='Output directory for test output. Default: ./output')
	parser.add_argument('--log_dir', type=str, default="./tensorboard",
		help='Log directory for tensorboard. Default: ./tensorboard')
	parser.add_argument('--model_dir', type=str, default="./model",
		help='Checkpoints directory for model. Default: ./model')
	parser.add_argument('--cache_dir', type=str, default="./cache",
		help='Checkpoints directory for cache. Default: ./cache')
	parser.add_argument('--cpu', action="store_true",
		help='Use cpu.')
	parser.add_argument('--debug', action='store_true',
		help='Enter debug mode (using ptvsd).')
	parser.add_argument('--cache', action='store_true',
		help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)')
	cargs = parser.parse_args(argv)


	# Editing following arguments to bypass command line.
	args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime())
	args.restore = cargs.restore
	args.mode = cargs.mode
	args.dataset = cargs.dataset
	args.datapath = cargs.datapath
	args.epochs = cargs.epoch
	args.wvclass = cargs.wvclass
	args.wvpath = cargs.wvpath
	args.out_dir = cargs.out_dir
	args.log_dir = cargs.log_dir
	args.model_dir = cargs.model_dir
	args.cache_dir = cargs.cache_dir
	args.debug = cargs.debug
	args.cache = cargs.cache
	args.cuda = not cargs.cpu

	# The following arguments are not controlled by command line.
	args.restore_optimizer = True
	load_exclude_set = []
	restoreCallback = None

	args.batch_per_epoch = 500
	args.embedding_size = 300
	args.eh_size = cargs.eh_size
	args.class_num = cargs.class_num

	args.droprate = cargs.droprate
	args.batchnorm = cargs.batchnorm

	args.lr = 1e-3
	args.batch_size = 64
	args.batch_num_per_gradient = 4
	args.grad_clip = 5
	args.show_sample = [0]  # show which batch when evaluating at tensorboard
	args.max_sent_length = 50
	args.checkpoint_steps = 20
	args.checkpoint_max_to_keep = 5

	import random
	random.seed(0)

	from main import main
	main(args, load_exclude_set, restoreCallback)
Beispiel #10
0
def run(*argv):
    import argparse
    import time
    from utils import Storage

    parser = argparse.ArgumentParser(description='A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\
        dropout and batchnorm is supported. It can train using RAML, Scheduled Sampling or Policy Gradient algorithms.')
    args = Storage()

    parser.add_argument('--name', type=str, default=None,
        help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)')
    parser.add_argument('--model', type=str, default="basic",choices=["basic","raml","scheduled-sampling","policy-gradient"],
        help='The type of algorithm. Choices: basic, raml, schedule-sampling, policy-gradient. Default: basic Seq2seq')
    parser.add_argument('--restore', type=str, default=None,
        help='Checkpoints name to load. \
            "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \
            You can also use "last" and "best", by default use last model you run. \
            It can also be an url started with "http". \
            Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \
            "last" and "best" are not guaranteed to work when 2 models run in the same time.\
            Default: None (don\'t load anything)')
    parser.add_argument('--mode', type=str, default="train",
        help='"train" or "test". Default: train')

    parser.add_argument('--lr', type=float, default=1e-3,
        help='Learning rate. Default: 0.001')
    parser.add_argument('--eh_size', type=int, default=200,
        help='Size of encoder GRU')
    parser.add_argument('--dh_size', type=int, default=200,
        help='Size of decoder GRU')
    parser.add_argument('--droprate', type=float, default=0,
        help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout')
    parser.add_argument('--batchnorm', action='store_true',
        help='Use bathnorm')
    parser.add_argument('--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='beam',
        help='The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \
            samplek(sample from topk), beam(beamsearch). Default: beam')
    parser.add_argument('--top_k', type=int, default=10,
        help='The top_k when decode_mode == "beam" or "samplek"')
    parser.add_argument('--length_penalty', type=float, default=0.7,
        help='The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.')

    parser.add_argument('--dataset', type=str, default='OpenSubtitles',
        help='Dataloader class. Default: OpenSubtitles')
    parser.add_argument('--dataid', type=str, default='resources://OpenSubtitles#OpenSubtitles',
        help='Resource id for data set. It can be a resource name or a local path. Default: resources://OpenSubtitles#OpenSubtitles')
    parser.add_argument('--epoch', type=int, default=100,
        help="Epoch for training. Default: 100")
    parser.add_argument('--batch_per_epoch', type=int, default=1500,
        help="Batches per epoch. Default: 1500")
    parser.add_argument('--wvclass', type=str, default='Glove',
        help="Wordvector class, none for not using pretrained wordvec. Default: Glove")
    parser.add_argument('--wvid', type=str, default="resources://Glove300d",
        help="Resource id for pretrained wordvector. Default: resources://Glove300d")

    parser.add_argument('--out_dir', type=str, default="./output",
        help='Output directory for test output. Default: ./output')
    parser.add_argument('--log_dir', type=str, default="./tensorboard",
        help='Log directory for tensorboard. Default: ./tensorboard')
    parser.add_argument('--model_dir', type=str, default="./model",
        help='Checkpoints directory for model. Default: ./model')
    parser.add_argument('--cache_dir', type=str, default="./cache",
        help='Checkpoints directory for cache. Default: ./cache')
    parser.add_argument('--cpu', action="store_true",
        help='Use cpu.')
    parser.add_argument('--device', type=int, default=0,
        help='Use cpu.')
    parser.add_argument('--debug', action='store_true',
        help='Enter debug mode (using ptvsd).')
    parser.add_argument('--cache', action='store_true',
        help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)')
    parser.add_argument('--seed', type=int, default=0,
        help='Specify random seed. Default: 0')

    # RAML parameters
    parser.add_argument('--raml_file', type=str, default='samples_iwslt14.txt',
                        help='the samples and rewards described in RAML')
    parser.add_argument('--n_samples', type=int, default=10,
                        help='number of samples for every target sentence')
    parser.add_argument('--tau', type=float, default=0.4,
                        help='the temperature in RAML algorithm')

    # Scheduled sampling parameters
    parser.add_argument('--decay_factor', type=float, default=500.,
        help='The hyperparameter controling the speed of increasing '
                   'the probability of sampling from model. Default: 500.')

    # Policy Gradient parameters
    parser.add_argument('--epoch_teacherForcing', type=int, default=10,
        help='How long to run teacherForcing before running policy gradient. Default: 10')
    parser.add_argument('--nb_sample_training', type=int, default=20,
        help='How many samples we take for each batch during policy gradient. Default: 20')
    parser.add_argument('--policy_gradient_reward_mode', type=str, default='mean',
        help='How the policy gradient is applied. Default: mean')

    cargs = parser.parse_args(argv)


    # Editing following arguments to bypass command line.
    args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime())
    args.model = cargs.model
    args.restore = cargs.restore
    args.mode = cargs.mode
    args.dataset = cargs.dataset
    args.datapath = cargs.dataid
    args.epochs = cargs.epoch
    args.wvclass = cargs.wvclass
    args.wvpath = cargs.wvid
    args.out_dir = cargs.out_dir
    args.log_dir = cargs.log_dir
    args.model_dir = cargs.model_dir
    args.cache_dir = cargs.cache_dir
    args.debug = cargs.debug
    args.cache = cargs.cache
    args.cuda = not cargs.cpu
    args.device = cargs.device

    # RAML parameters
    args.raml_file = cargs.raml_file
    args.n_samples = cargs.n_samples
    args.tau = cargs.tau

    # Scheduled sampling parameters
    args.decay_factor = cargs.decay_factor

    # Policy Gradient parameters
    args.epoch_teacherForcing = cargs.epoch_teacherForcing # How long to run teacherForcing before running policy gradient
    args.nb_sample_training   = cargs.nb_sample_training # How many samples we take for each batch during policy gradient
    args.policy_gradient_reward_mode = cargs.policy_gradient_reward_mode # How many samples we take for each batch during policy gradient

    # The following arguments are not controlled by command line.
    args.restore_optimizer = True
    load_exclude_set = []
    restoreCallback = None

    args.batch_per_epoch = cargs.batch_per_epoch
    args.embedding_size = 300
    args.eh_size = cargs.eh_size
    args.dh_size = cargs.dh_size

    args.decode_mode = cargs.decode_mode
    args.top_k = cargs.top_k
    args.length_penalty = cargs.length_penalty

    args.droprate = cargs.droprate
    args.batchnorm = cargs.batchnorm

    args.lr = cargs.lr
    args.batch_size = 3*args.n_samples if args.model=="raml" else 32
    args.batch_num_per_gradient = 4
    args.grad_clip = 5
    args.show_sample = [0]  # show which batch when evaluating at tensorboard
    args.max_sent_length = 50
    args.checkpoint_steps = 20
    args.checkpoint_max_to_keep = 5

    args.seed = cargs.seed

    import random
    random.seed(cargs.seed)
    import torch
    torch.manual_seed(cargs.seed)
    import numpy as np
    np.random.seed(cargs.seed)

    from main import main

    main(args, load_exclude_set, restoreCallback)