コード例 #1
0
ファイル: test_dict.py プロジェクト: sawravchy/ParlAI
    def test_basic_parse(self):
        """
        Check the dictionary is correctly adding and parsing short sentence.
        """
        argparser = ParlaiParser()
        DictionaryAgent.add_cmdline_args(argparser)
        opt = argparser.parse_args([], print_args=False)
        dictionary = DictionaryAgent(opt)
        num_builtin = len(dictionary)

        dictionary.observe({'text': 'hello world'})
        dictionary.act()
        assert len(dictionary) - num_builtin == 2

        vec = dictionary.parse('hello world')
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=list)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=tuple)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1
コード例 #2
0
ファイル: ir_baseline.py プロジェクト: Taekyung2/MichinAI
 def add_cmdline_args(parser):
     """
     Add command line args specific to this agent.
     """
     parser = parser.add_argument_group('IrBaseline Arguments')
     parser.add_argument(
         '-lp',
         '--length_penalty',
         type=float,
         default=0.5,
         help='length penalty for responses',
     )
     parser.add_argument(
         '-hsz',
         '--history_size',
         type=int,
         default=1,
         help='number of utterances from the dialogue history to take use '
         'as the query',
     )
     parser.add_argument(
         '--label_candidates_file',
         type=str,
         default=None,
         help='file of candidate responses to choose from',
     )
     DictionaryAgent.add_cmdline_args(parser)
コード例 #3
0
ファイル: coopgame_agent.py プロジェクト: yyzreal/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent. Default
     values at according to (Kottur et al. 2017)."""
     DictionaryAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Questioner Agent Arguments')
     argparser.add_argument(
         '--q-in-vocab',
         default=13,
         type=int,
         help='Input vocabulary for questioner. Usually includes total '
         'distinct words spoken by answerer, questioner itself, '
         'and words by which the goal is described.')
     argparser.add_argument('--q-embed-size',
                            default=20,
                            type=int,
                            help='Size of word embeddings for questioner')
     argparser.add_argument('--q-state-size',
                            default=100,
                            type=int,
                            help='Size of hidden state of questioner')
     argparser.add_argument('--q-out-vocab',
                            default=3,
                            type=int,
                            help='Output vocabulary for questioner')
     argparser.add_argument(
         '--q-num-pred',
         default=12,
         type=int,
         help='Size of output to be predicted (for goal).')
     super().add_cmdline_args(argparser)
コード例 #4
0
ファイル: test_dict.py プロジェクト: sawravchy/ParlAI
    def test_save_reload(self):
        """
        Save and reload an existing BL-BPE dictionary.
        """
        pp = ParlaiParser()
        DictionaryAgent.add_cmdline_args(pp)
        da = DictionaryAgent(
            pp.parse_args([
                '--dict-tokenizer',
                'bytelevelbpe',
                '--bpe-merge',
                DEFAULT_BYTELEVEL_BPE_MERGE,
                '--bpe-vocab',
                DEFAULT_BYTELEVEL_BPE_VOCAB,
            ]))
        # poor behavior if we failed to load
        assert da.txt2vec("hello") != []

        with testing_utils.tempdir() as tmpdir:
            newdf = os.path.join(tmpdir, "dict")
            da.save(newdf)

            # now load it
            da2 = DictionaryAgent(
                pp.parse_args(
                    ['--dict-tokenizer', 'bytelevelbpe', '--dict-file',
                     newdf]))
            assert da2.txt2vec("hello") == da.txt2vec("hello")
コード例 #5
0
ファイル: seq2seq.py プロジェクト: thebearer696/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs',
                        '--hiddensize',
                        type=int,
                        default=64,
                        help='size of the hidden layers and embeddings')
     agent.add_argument('-nl',
                        '--numlayers',
                        type=int,
                        default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr',
                        '--learningrate',
                        type=float,
                        default=0.5,
                        help='learning rate')
     agent.add_argument('-dr',
                        '--dropout',
                        type=float,
                        default=0.1,
                        help='dropout rate')
     agent.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='which GPU device to use')
コード例 #6
0
ファイル: test_dict.py プロジェクト: ahiroto/ParlAI
    def test_basic_parse(self):
        """Check that the dictionary is correctly adding and parsing short
        sentence.
        """
        from parlai.core.dict import DictionaryAgent
        from parlai.core.params import ParlaiParser

        argparser = ParlaiParser()
        DictionaryAgent.add_cmdline_args(argparser)
        opt = argparser.parse_args()
        dictionary = DictionaryAgent(opt)
        num_builtin = len(dictionary)

        dictionary.observe({'text': 'hello world'})
        dictionary.act()
        assert len(dictionary) - num_builtin == 2

        vec = dictionary.parse('hello world')
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=list)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=tuple)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1
コード例 #7
0
 def add_cmdline_args(argparser):
     """
     Add command-line arguments specifically for this agent.
     """
     group = argparser.add_argument_group(
         'Cooperative Game Agent Arguments')
     group.add_argument(
         '--optimizer',
         default='adam',
         choices=CooperativeGameAgent.OPTIM_OPTS.keys(),
         help='Choose between pytorch optimizers. Any member of '
         'torch.optim is valid and will be used with '
         'default params except learning rate (as specified '
         'by -lr).',
     )
     group.add_argument('--learning-rate',
                        default=1e-2,
                        type=float,
                        help='Initial learning rate')
     group.add_argument(
         '--no-cuda',
         action='store_true',
         default=False,
         help='disable GPUs even if available',
     )
     group.add_argument(
         '--gpuid',
         type=int,
         default=-1,
         help='which GPU device to use (defaults to cpu)',
     )
     DictionaryAgent.add_cmdline_args(argparser)
コード例 #8
0
def setup_args(parser=None, hidden=True):
    if parser is None:
        parser = ParlaiParser(True, True, 'Build a dictionary.')
    dict_loop = parser.add_argument_group('Dictionary Loop Arguments')
    dict_loop.add_argument(
        '--dict-maxexs',
        default=-1,
        type=int,
        help='max number of examples to build dict on',
        hidden=hidden,
    )
    dict_loop.add_argument(
        '--dict-include-valid',
        default=False,
        type='bool',
        help='Include validation set in dictionary building '
        'for task.',
        hidden=hidden,
    )
    dict_loop.add_argument(
        '--dict-include-test',
        default=False,
        type='bool',
        help='Include test set in dictionary building for task.',
        hidden=hidden,
    )
    dict_loop.add_argument('-ltim',
                           '--log-every-n-secs',
                           type=float,
                           default=10,
                           hidden=hidden)
    DictionaryAgent.add_cmdline_args(parser)
    return parser
コード例 #9
0
ファイル: fairseq.py プロジェクト: wajustinzhang/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Fairseq Arguments')
     agent.add_argument(
         '-tr',
         '--truncate',
         type=int,
         default=-1,
         help='truncate input & output lengths to speed up training (may '
         'reduce accuracy). This fixes all input and output to have a '
         'maximum length. This reduces the total amount of padding in '
         'the batches.')
     agent.add_argument('--max-positions',
                        default=1024,
                        type=int,
                        metavar='N',
                        help='max number of tokens in the sequence')
     agent.add_argument('--seed',
                        default=1,
                        type=int,
                        metavar='N',
                        help='pseudo random number generator seed')
     options.add_optimization_args(argparser)
     options.add_generation_args(argparser)
     options.add_model_args(argparser)
コード例 #10
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True)
    DictionaryAgent.add_cmdline_args(parser)
    # Get command line arguments
    parser.add_argument('-ne', '--num-examples', type=int, default=-1)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument('-ed',
                        '--external-dict',
                        type=str,
                        default=None,
                        help='External dictionary for stat computation')
    parser.add_argument('-fb',
                        '--freq-bins',
                        type=str,
                        default='0,100,1000,10000',
                        help='Bins boundaries for rare words stat')
    parser.add_argument('-dup',
                        '--dump-predictions-path',
                        type=str,
                        default=None,
                        help='Dump predictions into file')
    parser.add_argument('-cun',
                        '--compute-unique',
                        type=bool,
                        default=True,
                        help='Compute % of unique responses from the model')
    parser.set_defaults(datatype='valid', model='repeat_label')
    return parser
コード例 #11
0
ファイル: memnn.py プロジェクト: jojonki/ParlAI
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     arg_group = argparser.add_argument_group('MemNN Arguments')
     arg_group.add_argument('-lr', '--learning-rate', type=float, default=0.01,
         help='learning rate')
     arg_group.add_argument('--embedding-size', type=int, default=128,
         help='size of token embeddings')
     arg_group.add_argument('--hops', type=int, default=3,
         help='number of memory hops')
     arg_group.add_argument('--mem-size', type=int, default=100,
         help='size of memory')
     arg_group.add_argument('--time-features', type='bool', default=True,
         help='use time features for memory embeddings')
     arg_group.add_argument('--position-encoding', type='bool', default=False,
         help='use position encoding instead of bag of words embedding')
     arg_group.add_argument('--output', type=str, default='rank',
         help='type of output (rank|generate)')
     arg_group.add_argument('--rnn-layers', type=int, default=2,
         help='number of hidden layers in RNN decoder for generative output')
     arg_group.add_argument('--dropout', type=float, default=0.1,
         help='dropout probability for RNN decoder training')
     arg_group.add_argument('--optimizer', default='adam',
         help='optimizer type (sgd|adam)')
     arg_group.add_argument('--no-cuda', action='store_true', default=False,
         help='disable GPUs even if available')
     arg_group.add_argument('--gpu', type=int, default=-1,
         help='which GPU device to use')
コード例 #12
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Build a dictionary.')
    dict_loop = parser.add_argument_group('Dictionary Loop Arguments')
    dict_loop.add_argument('--dict-maxexs',
                           default=-1,
                           type=int,
                           help='max number of examples to build dict on')
    dict_loop.add_argument(
        '--dict-include-valid',
        default=False,
        type='bool',
        help='Include validation set in dictionary building '
        'for task.')
    dict_loop.add_argument(
        '--dict-include-test',
        default=False,
        type='bool',
        help='Include test set in dictionary building for task.')
    dict_loop.add_argument('-ltim',
                           '--log-every-n-secs',
                           type=float,
                           default=2)
    partial, _ = parser.parse_known_args(nohelp=True)
    if vars(partial).get('dict_class'):
        str2class(vars(partial).get('dict_class')).add_cmdline_args(parser)
    else:
        DictionaryAgent.add_cmdline_args(parser)
    return parser
コード例 #13
0
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     argparser.add_arg('-hs',
                       '--hiddensize',
                       type=int,
                       default=64,
                       help='size of the hidden layers and embeddings')
     argparser.add_arg('-nl',
                       '--numlayers',
                       type=int,
                       default=2,
                       help='number of hidden layers')
     argparser.add_arg('-lr',
                       '--learningrate',
                       type=float,
                       default=0.5,
                       help='learning rate')
     argparser.add_arg('-dr',
                       '--dropout',
                       type=float,
                       default=0.1,
                       help='dropout rate')
     argparser.add_arg('--no-cuda',
                       action='store_true',
                       default=False,
                       help='disable GPUs even if available')
     argparser.add_arg('--gpu',
                       type=int,
                       default=-1,
                       help='which GPU device to use')
コード例 #14
0
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     arg_group = argparser.add_argument_group('MemNN Arguments')
     arg_group.add_argument('-lr', '--learning-rate', type=float, default=0.01,
         help='learning rate')
     arg_group.add_argument('--embedding-size', type=int, default=128,
         help='size of token embeddings')
     arg_group.add_argument('--hops', type=int, default=3,
         help='number of memory hops')
     arg_group.add_argument('--mem-size', type=int, default=100,
         help='size of memory')
     arg_group.add_argument('--time-features', type='bool', default=True,
         help='use time features for memory embeddings')
     arg_group.add_argument('--position-encoding', type='bool', default=False,
         help='use position encoding instead of bag of words embedding')
     arg_group.add_argument('--output', type=str, default='rank',
         help='type of output (rank|generate)')
     arg_group.add_argument('--rnn-layers', type=int, default=2,
         help='number of hidden layers in RNN decoder for generative output')
     arg_group.add_argument('--dropout', type=float, default=0.1,
         help='dropout probability for RNN decoder training')
     arg_group.add_argument('--optimizer', default='adam',
         help='optimizer type (sgd|adam)')
     arg_group.add_argument('--no-cuda', action='store_true', default=False,
         help='disable GPUs even if available')
     arg_group.add_argument('--gpu', type=int, default=-1,
         help='which GPU device to use')
     arg_group.add_argument('-hist', '--history-length', default=100000, type=int,
                        help='Number of past tokens to remember. '
                             'Default remembers 100000 tokens.')
     arg_group.add_argument('-histr', '--history-replies', default='label', type=str,
         choices=['none', 'model', 'label'],
         help='Keep replies in the history, or not.')
コード例 #15
0
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     argparser.add_arg(
         '--pretrained_words',
         type='bool',
         default=True,
         help='Use only words found in provided embedding_file')
コード例 #16
0
ファイル: fairseq.py プロジェクト: ahiroto/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Fairseq Arguments')
     agent.add_argument(
         '-tr', '--truncate',
         type=int, default=-1,
         help='truncate input & output lengths to speed up training (may '
              'reduce accuracy). This fixes all input and output to have a '
              'maximum length. This reduces the total amount of padding in '
              'the batches.')
     agent.add_argument(
         '--max-positions',
         default=1024,
         type=int,
         metavar='N',
         help='max number of tokens in the sequence')
     agent.add_argument(
         '--seed',
         default=1,
         type=int,
         metavar='N',
         help='pseudo random number generator seed')
     options.add_optimization_args(argparser)
     options.add_generation_args(argparser)
     options.add_model_args(argparser)
コード例 #17
0
 def add_cmdline_args(argparser):
     """Add command line args."""
     arg_group = argparser.add_argument_group('Transresnet Arguments')
     TransresnetModel.add_cmdline_args(argparser)
     argparser.add_argument(
         '--freeze-patience',
         type=int,
         default=-1,
         help='How long to freeze text encoders',
     )
     argparser.add_argument(
         '--one-cand-set',
         type='bool',
         default=False,
         help='True if each example has one set of shared '
         'label candidates',
     )
     argparser.add_argument(
         '--fixed-cands-path',
         type=str,
         default=None,
         help='path to text file with candidates',
     )
     argparser.add_argument('--pretrained',
                            type='bool',
                            default=False,
                            help='True if pretrained model')
     DictionaryAgent.add_cmdline_args(argparser)
     return arg_group
コード例 #18
0
ファイル: data_stats.py プロジェクト: youngshingjun/ParlAI
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, False, 'Lint for ParlAI tasks')
    parser.add_pytorch_datateacher_args()
    # Get command line arguments
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '--agent',
        type=int,
        default=0,
        help='Use teacher (agent 0) or model (agent 1)',
        choices=[0, 1],
    )
    parser.add_argument(
        '--new_line_new_utt',
        type='bool',
        default=False,
        help='New lines treat substrings as separate utterances.',
    )
    parser.add_argument(
        '--ignore_tokens',
        type=str,
        default='',
        help='ignore tokens containings these substrings (comma-separated)',
    )
    parser.set_defaults(datatype='train:ordered')
    DictionaryAgent.add_cmdline_args(parser)
    return parser
コード例 #19
0
ファイル: coopgame_agent.py プロジェクト: yyzreal/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent. Default
     values at according to (Kottur et al. 2017)."""
     DictionaryAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Questioner Agent Arguments')
     parser.add_argument(
         '--a-in-vocab',
         default=13,
         type=int,
         help='Input vocabulary for questioner. Usually includes total '
         'distinct words spoken by answerer, questioner itself, '
         'and words by which the goal is described.')
     parser.add_argument('--a-embed-size',
                         default=20,
                         type=int,
                         help='Size of word embeddings for questioner')
     parser.add_argument('--a-state-size',
                         default=100,
                         type=int,
                         help='Size of hidden state of questioner')
     parser.add_argument('--a-out-vocab',
                         default=3,
                         type=int,
                         help='Output vocabulary for questioner')
     parser.add_argument('--a-img-feat-size',
                         default=12,
                         type=int,
                         help='Size of output to be predicted (for goal).')
     parser.add_argument(
         '--a-memoryless',
         default=False,
         action='store_true',
         help='Whether to remember previous questions/answers encountered.')
     super().add_cmdline_args(argparser)
コード例 #20
0
 def add_cmdline_args(parser):
     DictionaryAgent.add_cmdline_args(parser)
     parser.add_argument(
         '-lp', '--length_penalty', type=float, default=0.5,
         help='length penalty for responses')
     parser.add_argument(
         '-hsz', '--history_size', type=int, default=1,
         help='number of utterances from the dialogue history to take use as the query')
コード例 #21
0
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     argparser.add_argument(
         '--tracker',
         required=False,
         choices=['babi5', 'babi6'],
         help='Type of entity tracker to use. Implemented only '
         'for dialog_babi5 and dialog_babi6.')
     return argparser
コード例 #22
0
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs',
                        '--hiddensize',
                        type=int,
                        default=128,
                        help='size of the hidden layers and embeddings')
     agent.add_argument('-nl',
                        '--numlayers',
                        type=int,
                        default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr',
                        '--learningrate',
                        type=float,
                        default=0.5,
                        help='learning rate')
     agent.add_argument('-dr',
                        '--dropout',
                        type=float,
                        default=0.1,
                        help='dropout rate')
     # agent.add_argument('-att', '--attention', type='bool', default=False,
     #     help='whether to use attention over the context during decoding')
     # agent.add_argument('-bi', '--bidirectional', type='bool', default=False,
     #     help='whether to encode the context with a bidirectional RNN')
     agent.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='which GPU device to use')
     agent.add_argument(
         '-rc',
         '--rank-candidates',
         type='bool',
         default=False,
         help='rank candidates if available. this is done by computing the'
         + ' mean score per token for each candidate and selecting the ' +
         'highest scoring one.')
     agent.add_argument(
         '-tr',
         '--truncate',
         type='bool',
         default=True,
         help='truncate input & output lengths to speed up training ' +
         '(may reduce accuracy). This fixes all input and output ' +
         'to have a maximum length and to be similar in length to ' +
         'one another by throwing away extra tokens. This reduces ' +
         'the total amount of padding in the batches.')
コード例 #23
0
ファイル: test_dict.py プロジェクト: Taekyung2/MichinAI
    def test_nofile(self):
        pp = ParlaiParser()
        DictionaryAgent.add_cmdline_args(pp)
        with self.assertRaises(IOError):
            # did not specify bpe merge or vocab
            DictionaryAgent(pp.parse_args(['--dict-tokenizer',
                                           'bytelevelbpe']))

        with self.assertRaises(IOError):
            # specified one
            DictionaryAgent(
                pp.parse_args([
                    '--dict-tokenizer',
                    'bytelevelbpe',
                    '--bpe-merge',
                    DEFAULT_BYTELEVEL_BPE_MERGE,
                ]))

        with self.assertRaises(IOError):
            # specified the other
            DictionaryAgent(
                pp.parse_args([
                    '--dict-tokenizer',
                    'bytelevelbpe',
                    '--bpe-vocab',
                    DEFAULT_BYTELEVEL_BPE_VOCAB,
                ]))

        with self.assertRaises(IOError):
            # intentionally missing file
            DictionaryAgent(
                pp.parse_args([
                    '--dict-tokenizer',
                    'bytelevelbpe',
                    '--bpe-merge',
                    'foobar',  # intentionally wrong
                    '--bpe-vocab',
                    DEFAULT_BYTELEVEL_BPE_VOCAB,
                ]))

        with self.assertRaises(IOError):
            # intentionally missing file
            DictionaryAgent(
                pp.parse_args([
                    '--dict-tokenizer',
                    'bytelevelbpe',
                    '--bpe-merge',
                    DEFAULT_BYTELEVEL_BPE_MERGE,
                    '--bpe-vocab',
                    'foobar',  # intentionally wrong
                ]))
コード例 #24
0
ファイル: seq2seq.py プロジェクト: zhongyunuestc/convai
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs', '--hiddensize', type=int, default=128,
                        help='size of the hidden layers')
     agent.add_argument('-emb', '--embeddingsize', type=int, default=128,
                        help='size of the token embeddings')
     agent.add_argument('-nl', '--numlayers', type=int, default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr', '--learning_rate', type=float, default=0.5,
                        help='learning rate')
     agent.add_argument('-dr', '--dropout', type=float, default=0.1,
                        help='dropout rate')
     agent.add_argument('-att', '--attention', type=int, default=0,
                        help='if greater than 0, use attention of specified'
                             ' length while decoding')
     agent.add_argument('--no-cuda', action='store_true', default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu', type=int, default=-1,
                        help='which GPU device to use')
     agent.add_argument('-rc', '--rank-candidates', type='bool',
                        default=False,
                        help='rank candidates if available. this is done by'
                             ' computing the mean score per token for each '
                             'candidate and selecting the highest scoring.')
     agent.add_argument('-tr', '--truncate', type='bool', default=True,
                        help='truncate input & output lengths to speed up '
                        'training (may reduce accuracy). This fixes all '
                        'input and output to have a maximum length and to '
                        'be similar in length to one another by throwing '
                        'away extra tokens. This reduces the total amount '
                        'of padding in the batches.')
     agent.add_argument('-enc', '--encoder', default='gru',
                        choices=Seq2seqAgent.ENC_OPTS.keys(),
                        help='Choose between different encoder modules.')
     agent.add_argument('-dec', '--decoder', default='same',
                        choices=['same', 'shared'] + list(Seq2seqAgent.ENC_OPTS.keys()),
                        help='Choose between different decoder modules. '
                             'Default "same" uses same class as encoder, '
                             'while "shared" also uses the same weights.')
     agent.add_argument('-opt', '--optimizer', default='sgd',
                        choices=Seq2seqAgent.OPTIM_OPTS.keys(),
                        help='Choose between pytorch optimizers. '
                             'Any member of torch.optim is valid and will '
                             'be used with default params except learning '
                             'rate (as specified by -lr).')
     agent.add_argument('-epi', '--episode-concat', type='bool', default=False,
                    help='If multiple observations are from the same episode, concatenate them.')
コード例 #25
0
ファイル: seq2seq.py プロジェクト: analyticlaks/ParlAI
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs', '--hiddensize', type=int, default=64,
         help='size of the hidden layers and embeddings')
     agent.add_argument('-nl', '--numlayers', type=int, default=2,
         help='number of hidden layers')
     agent.add_argument('-lr', '--learningrate', type=float, default=0.5,
         help='learning rate')
     agent.add_argument('-dr', '--dropout', type=float, default=0.1,
         help='dropout rate')
     agent.add_argument('--no-cuda', action='store_true', default=False,
         help='disable GPUs even if available')
     agent.add_argument('--gpu', type=int, default=-1,
         help='which GPU device to use')
コード例 #26
0
ファイル: coopgame_agent.py プロジェクト: ahiroto/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Cooperative Game Agent Arguments')
     agent.add_argument('--optimizer', default='adam',
                        choices=CooperativeGameAgent.OPTIM_OPTS.keys(),
                        help='Choose between pytorch optimizers. Any member of torch.optim '
                             'is valid and will be used with default params except learning '
                             'rate (as specified by -lr).')
     group.add_argument('--learning-rate', default=1e-2, type=float,
                        help='Initial learning rate')
     group.add_argument('--no-cuda', action='store_true', default=False,
                        help='disable GPUs even if available')
     group.add_argument('--gpuid', type=int, default=-1,
                        help='which GPU device to use (defaults to cpu)')
コード例 #27
0
ファイル: drqa.py プロジェクト: ahiroto/ParlAI
 def add_cmdline_args(argparser):
     group = DictionaryAgent.add_cmdline_args(argparser)
     group.add_argument(
         '--pretrained_words', type='bool', default=True,
         help='Use only words found in provided embedding_file'
     )
     group.set_defaults(dict_tokenizer='spacy')
コード例 #28
0
ファイル: drqa.py プロジェクト: hjnnewton/ParlAI2
 def add_cmdline_args(argparser):
     group = DictionaryAgent.add_cmdline_args(argparser)
     group.add_argument(
         '--pretrained_words', type='bool', default=True,
         help='Use only words found in provided embedding_file'
     )
     group.set_defaults(dict_tokenizer='spacy')
コード例 #29
0
 def add_cmdline_args(argparser):
     """Add command line arguments"""
     group = DictionaryAgent.add_cmdline_args(argparser)
     group.add_argument(
         '--dict_class', default=class2str(NERDictionaryAgent),
         help='Sets the dictionary\'s class'
     )
コード例 #30
0
ファイル: hciae.py プロジェクト: wolegechu/ParlAI
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     arg_group = argparser.add_argument_group('HCIAE Arguments')
     arg_group.add_argument('--dropout', type=float, default=.1, help='')
     arg_group.add_argument('--embedding-size', type=int, default=512, help='')
     arg_group.add_argument('--hidden-size', type=int, default=512, help='')
     arg_group.add_argument('--no-cuda', action='store_true', default=False,
                            help='disable GPUs even if available')
     arg_group.add_argument('--gpu', type=int, default=-1,
                            help='which GPU device to use')
     arg_group.add_argument('--rnn-layers', type=int, default=2,
         help='number of hidden layers in RNN decoder for generative output')
     arg_group.add_argument('--optimizer', default='adam',
         help='optimizer type (sgd|adam)')
     arg_group.add_argument('-lr', '--learning-rate', type=float, default=0.01,
                            help='learning rate')
コード例 #31
0
ファイル: seq2seq.py プロジェクト: jojonki/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs', '--hiddensize', type=int, default=128,
                        help='size of the hidden layers')
     agent.add_argument('-emb', '--embeddingsize', type=int, default=128,
                        help='size of the token embeddings')
     agent.add_argument('-nl', '--numlayers', type=int, default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr', '--learningrate', type=float, default=0.5,
                        help='learning rate')
     agent.add_argument('-dr', '--dropout', type=float, default=0.1,
                        help='dropout rate')
     agent.add_argument('-att', '--attention', type=int, default=0,
                        help='if greater than 0, use attention of specified'
                             ' length while decoding')
     agent.add_argument('--no-cuda', action='store_true', default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu', type=int, default=-1,
                        help='which GPU device to use')
     agent.add_argument('-rc', '--rank-candidates', type='bool',
                        default=False,
                        help='rank candidates if available. this is done by'
                             ' computing the mean score per token for each '
                             'candidate and selecting the highest scoring.')
     agent.add_argument('-tr', '--truncate', type='bool', default=True,
                        help='truncate input & output lengths to speed up '
                        'training (may reduce accuracy). This fixes all '
                        'input and output to have a maximum length and to '
                        'be similar in length to one another by throwing '
                        'away extra tokens. This reduces the total amount '
                        'of padding in the batches.')
     agent.add_argument('-enc', '--encoder', default='gru',
                        choices=Seq2seqAgent.ENC_OPTS.keys(),
                        help='Choose between different encoder modules.')
     agent.add_argument('-dec', '--decoder', default='same',
                        choices=['same', 'shared'] + list(Seq2seqAgent.ENC_OPTS.keys()),
                        help='Choose between different decoder modules. '
                             'Default "same" uses same class as encoder, '
                             'while "shared" also uses the same weights.')
     agent.add_argument('-opt', '--optimizer', default='sgd',
                        choices=Seq2seqAgent.OPTIM_OPTS.keys(),
                        help='Choose between pytorch optimizers. '
                             'Any member of torch.optim is valid and will '
                             'be used with default params except learning '
                             'rate (as specified by -lr).')
コード例 #32
0
 def add_cmdline_args(argparser):
     """Specify permission to index words not included in embedding_file."""
     group = DictionaryAgent.add_cmdline_args(argparser)
     group.add_argument(
         '--pretrained_words',
         type='bool',
         default=True,
         help='Use only words found in provided embedding_file')
コード例 #33
0
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Fairseq Arguments')
     agent.add_argument('--max-positions',
                        default=1024,
                        type=int,
                        metavar='N',
                        help='max number of tokens in the sequence')
     agent.add_argument('--seed',
                        default=1,
                        type=int,
                        metavar='N',
                        help='pseudo random number generator seed')
     options.add_optimization_args(argparser)
     options.add_generation_args(argparser)
     options.add_model_args(argparser)
コード例 #34
0
 def add_cmdline_args(argparser):
     DictionaryAgent.add_cmdline_args(argparser)
     argparser.add_arg('-hs', '--embedding_size', type=int, default=constants.EMBEDDING_SIZE,
         help='size of the embeddings')
     argparser.add_arg('-nel', '--num_encoder_layers', type=int, default=constants.NUM_ENCODER_LAYERS,
         help='number of encoder layers')
     argparser.add_arg('-ndl', '--num_decoder_layers', type=int, default=constants.NUM_DECODER_LAYERS,
                       help='number of decoder layers')
     argparser.add_arg('-ks', '--kernel_size', type=int, default=constants.KERNEL_SIZE,
         help='size of the convolution kernel')
     argparser.add_arg('-lr', '--learning_rate', type=float, default=constants.LEARNING_RATE,
         help='learning rate')
     argparser.add_arg('-dr', '--dropout', type=float, default=0.1,
         help='dropout rate')
     argparser.add_arg('--cuda', action='store_true', default=constants.USE_CUDA,
         help='disable GPUs even if available')
     argparser.add_arg('--gpu', type=int, default=-1,
         help='which GPU device to use')
コード例 #35
0
ファイル: coopgame_agent.py プロジェクト: ahiroto/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent. Default
     values at according to (Kottur et al. 2017)."""
     DictionaryAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Questioner Agent Arguments')
     parser.add_argument('--q-in-vocab', default=13, type=int,
                         help='Input vocabulary for questioner. Usually includes total '
                              'distinct words spoken by answerer, questioner itself, '
                              'and words by which the goal is described.')
     parser.add_argument('--q-embed-size', default=20, type=int,
                         help='Size of word embeddings for questioner')
     parser.add_argument('--q-state-size', default=100, type=int,
                         help='Size of hidden state of questioner')
     parser.add_argument('--q-out-vocab', default=3, type=int,
                         help='Output vocabulary for questioner')
     parser.add_argument('--q-num-pred', default=12, type=int,
                         help='Size of output to be predicted (for goal).')
     super().add_cmdline_args(argparser)
コード例 #36
0
 def __init__(self, opt, shared=None):
     super().__init__(opt)
     self.id = 'IRBaselineAgent'
     parser = ParlaiParser(True)
     DictionaryAgent.add_cmdline_args(parser)
     parser.add_argument('-lp',
                         '--length_penalty',
                         default=0.5,
                         help='length penalty for responses')
     p = opt.get('model_params', None)
     if p:
         p = p.split(' ')
     else:
         p = []
     model_opts = parser.parse_args(p)
     self.length_penalty = float(model_opts['length_penalty'])
     self.dictionary = DictionaryAgent(model_opts)
     self.opt = model_opts
コード例 #37
0
ファイル: seq2seq.py プロジェクト: swkrueger/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs',
                        '--hiddensize',
                        type=int,
                        default=128,
                        help='size of the hidden layers and embeddings')
     agent.add_argument('-nl',
                        '--numlayers',
                        type=int,
                        default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr',
                        '--learningrate',
                        type=float,
                        default=0.5,
                        help='learning rate')
     agent.add_argument('-dr',
                        '--dropout',
                        type=float,
                        default=0.1,
                        help='dropout rate')
     # agent.add_argument('-att', '--attention', type='bool', default=False,
     #     help='whether to use attention over the context during decoding')
     # agent.add_argument('-bi', '--bidirectional', type='bool', default=False,
     #     help='whether to encode the context with a bidirectional RNN')
     agent.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='which GPU device to use')
     agent.add_argument(
         '-rc',
         '--rank-candidates',
         type='bool',
         default=False,
         help='rank candidates if available. this is done by computing the'
         + ' mean score per token for each candidate and selecting the ' +
         'highest scoring one.')
コード例 #38
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True,
                              'compute statistics from model predictions')
    DictionaryAgent.add_cmdline_args(parser)

    # These defaults can be overriden by both .opt file and user's command line flags
    parser.add_argument('-ne', '--num-examples', type=int, default=-1)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '-ed',
        '--external-dict',
        type=str,
        default=None,
        help='External dictionary for stat computation',
    )
    parser.add_argument(
        '-fb',
        '--freq-bins',
        type=str,
        default='0,100,1000,10000',
        help='Bins boundaries for rare words stat',
    )
    parser.add_argument(
        '-gr',
        '--gold-response',
        type=bool,
        default=False,
        help='Compute stats for gold response',
    )

    # These settings override .opt file but not user's command line flags
    parser.set_params(
        datatype='valid',
        task='projects.controllable_dialogue.tasks.agents',
        model=
        'projects.controllable_dialogue.controllable_seq2seq.controllable_seq2seq:ControllableSeq2seqAgent',  # noqa: E501
        batchsize=64,
        beam_size=20,
        beam_min_n_best=10,
        use_reply='model',
    )
    TensorboardLogger.add_cmdline_args(parser)
    return parser
コード例 #39
0
ファイル: fairseq.py プロジェクト: youlei5898/ParlAI
    def add_cmdline_args(argparser):
        """Add command-line arguments specifically for this agent."""
        DictionaryAgent.add_cmdline_args(argparser)
        agent = argparser.add_argument_group('Fairseq Arguments')

        agent.add_argument(
            '--max-positions',
            default=1024,
            type=int,
            metavar='N',
            help='max number of tokens in the sequence')
        agent.add_argument(
            '--seed',
            default=1,
            type=int,
            metavar='N',
            help='pseudo random number generator seed')
        agent.add_argument(
            '--lr',
            '--learning-rate',
            default=0.25,
            type=float,
            metavar='LR',
            help='initial learning rate')
        agent.add_argument(
            '--momentum',
            default=0.99,
            type=float,
            metavar='M',
            help='momentum factor')
        agent.add_argument(
            '--weight-decay',
            '--wd',
            default=0.0,
            type=float,
            metavar='WD',
            help='weight decay')
        agent.add_argument(
            '--force-anneal',
            '--fa',
            default=0,
            type=int,
            metavar='N',
            help='force annealing at specified epoch')
        agent.add_argument(
            '--beam', default=5, type=int, metavar='N', help='beam size')
        agent.add_argument(
            '--no-early-stop',
            action='store_true',
            help=('continue searching even after finalizing k=beam '
                  'hypotheses; this is more correct, but increases '
                  'generation time by 50%%'))
        agent.add_argument(
            '--unnormalized',
            action='store_true',
            help='compare unnormalized hypothesis scores')

        agent.add_argument(
            '--lenpen',
            default=1,
            type=float,
            help=
            'length penalty: <1.0 favors shorter, >1.0 favors longer sentences')

        agent.add_argument(
            '--clip-norm',
            default=25,
            type=float,
            metavar='NORM',
            help='clip threshold of gradients')

        agent.add_argument(
            '--arch',
            '-a',
            default='fconv',
            metavar='ARCH',
            choices=models.arch_model_map.keys(),
            help='model architecture ({})'.format(
                ', '.join(models.arch_model_map.keys())))
        agent.add_argument(
            '--encoder-embed-dim',
            type=int,
            metavar='N',
            help='encoder embedding dimension')
        agent.add_argument(
            '--encoder-layers',
            type=str,
            metavar='EXPR',
            help='encoder layers [(dim, kernel_size), ...]')
        agent.add_argument(
            '--decoder-embed-dim',
            type=int,
            metavar='N',
            help='decoder embedding dimension')
        agent.add_argument(
            '--decoder-layers',
            type=str,
            metavar='EXPR',
            help='decoder layers [(dim, kernel_size), ...]')
        agent.add_argument(
            '--decoder-out-embed-dim',
            type=int,
            metavar='N',
            help='decoder output embedding dimension')
        agent.add_argument(
            '--decoder-attention',
            type=str,
            metavar='EXPR',
            help='decoder attention [True, ...]')

        # These arguments have default values independent of the model:
        agent.add_argument(
            '--dropout',
            default=0.1,
            type=float,
            metavar='D',
            help='dropout probability')
        agent.add_argument(
            '--label-smoothing',
            default=0,
            type=float,
            metavar='D',
            help='epsilon for label smoothing, 0 means no label smoothing')
コード例 #40
0
ファイル: build_dict.py プロジェクト: youlei5898/ParlAI
def main():
    # Get command line arguments
    argparser = ParlaiParser()
    DictionaryAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    build_dict(opt)
コード例 #41
0
ファイル: seq2seq.py プロジェクト: youlei5898/ParlAI
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Seq2Seq Arguments')
     agent.add_argument('-hs', '--hiddensize', type=int, default=128,
                        help='size of the hidden layers')
     agent.add_argument('-esz', '--embeddingsize', type=int, default=128,
                        help='size of the token embeddings')
     agent.add_argument('-nl', '--numlayers', type=int, default=2,
                        help='number of hidden layers')
     agent.add_argument('-lr', '--learningrate', type=float, default=0.005,
                        help='learning rate')
     agent.add_argument('-dr', '--dropout', type=float, default=0.1,
                        help='dropout rate')
     agent.add_argument('-bi', '--bidirectional', type='bool',
                        default=False,
                        help='whether to encode the context with a '
                             'bidirectional rnn')
     agent.add_argument('-att', '--attention', default='none',
                        choices=['none', 'concat', 'general', 'dot', 'local'],
                        help='Choices: none, concat, general, local. '
                             'If set local, also set attention-length. '
                             'For more details see: '
                             'https://arxiv.org/pdf/1508.04025.pdf')
     agent.add_argument('-attl', '--attention-length', default=48, type=int,
                        help='Length of local attention.')
     agent.add_argument('--no-cuda', action='store_true', default=False,
                        help='disable GPUs even if available')
     agent.add_argument('--gpu', type=int, default=-1,
                        help='which GPU device to use')
     agent.add_argument('-rc', '--rank-candidates', type='bool',
                        default=False,
                        help='rank candidates if available. this is done by'
                             ' computing the mean score per token for each '
                             'candidate and selecting the highest scoring.')
     agent.add_argument('-tr', '--truncate', type=int, default=-1,
                        help='truncate input & output lengths to speed up '
                        'training (may reduce accuracy). This fixes all '
                        'input and output to have a maximum length and to '
                        'be similar in length to one another by throwing '
                        'away extra tokens. This reduces the total amount '
                        'of padding in the batches.')
     agent.add_argument('-enc', '--encoder', default='gru',
                        choices=Seq2seqAgent.ENC_OPTS.keys(),
                        help='Choose between different encoder modules.')
     agent.add_argument('-dec', '--decoder', default='same',
                        choices=['same', 'shared'] + list(Seq2seqAgent.ENC_OPTS.keys()),
                        help='Choose between different decoder modules. '
                             'Default "same" uses same class as encoder, '
                             'while "shared" also uses the same weights. '
                             'Note that shared disabled some encoder '
                             'options--in particular, bidirectionality.')
     agent.add_argument('-lt', '--lookuptable', default='all',
                        choices=['unique', 'enc_dec', 'dec_out', 'all'],
                        help='The encoder, decoder, and output modules can '
                             'share weights, or not. '
                             'Unique has independent embeddings for each. '
                             'Enc_dec shares the embedding for the encoder '
                             'and decoder. '
                             'Dec_out shares decoder embedding and output '
                             'weights. '
                             'All shares all three weights.')
     agent.add_argument('-opt', '--optimizer', default='adam',
                        choices=Seq2seqAgent.OPTIM_OPTS.keys(),
                        help='Choose between pytorch optimizers. '
                             'Any member of torch.optim is valid and will '
                             'be used with default params except learning '
                             'rate (as specified by -lr).')
     agent.add_argument('-emb', '--embedding-type', default='random',
                        choices=['random', 'glove', 'glove-fixed'],
                        help='Choose between different strategies '
                             'for word embeddings. Default is random, '
                             'but can also preinitialize from Glove.'
                             'Preinitialized embeddings can also be fixed '
                             'so they are not updated during training.')
     agent.add_argument('-lm', '--language-model', default='none',
                        choices=['none', 'only', 'both'],
                        help='Enabled language modeling training on the '
                             'concatenated input and label data.')
コード例 #42
0
ファイル: ir_baseline.py プロジェクト: ahiroto/ParlAI
 def add_cmdline_args(parser):
     DictionaryAgent.add_cmdline_args(parser)
     parser.add_argument(
         '-lp', '--length_penalty', default=0.5,
         help='length penalty for responses')
コード例 #43
0
def main():
    # Get command line arguments
    argparser = ParlaiParser()
    DictionaryAgent.add_cmdline_args(argparser)
    ParsedRemoteAgent.add_cmdline_args(argparser)
    argparser.add_argument('--num-examples', default=1000, type=int)
    argparser.add_argument('--num-its', default=100, type=int)
    argparser.add_argument('--dict-max-exs', default=10000, type=int)
    parlai_home = os.environ['PARLAI_HOME']
    if '--remote-cmd' not in sys.argv:
        if os.system('which luajit') != 0:
            raise RuntimeError('Could not detect torch luajit installed: ' +
                               'please install torch from http://torch.ch ' +
                               'or manually set --remote-cmd for this example.')
        sys.argv.append('--remote-cmd')
        sys.argv.append('luajit {}/parlai/agents/'.format(parlai_home) +
                        'memnn_luatorch_cpu/memnn_zmq_parsed.lua')
    if '--remote-args' not in sys.argv:
        sys.argv.append('--remote-args')
        sys.argv.append('{}/examples/'.format(parlai_home) +
                        'memnn_luatorch_cpu/params_default.lua')

    opt = argparser.parse_args()

    # set up dictionary
    print('Setting up dictionary.')
    dictionary = DictionaryAgent(opt)
    if not opt.get('dict_file'):
        # build dictionary since we didn't load it
        ordered_opt = copy.deepcopy(opt)
        ordered_opt['datatype'] = 'train:ordered'
        ordered_opt['numthreads'] = 1
        world_dict = create_task(ordered_opt, dictionary)

        print('Dictionary building on training data.')
        cnt = 0
        # pass examples to dictionary
        for _ in world_dict:
            cnt += 1
            if cnt > opt['dict_max_exs'] and opt['dict_max_exs'] > 0:
                print('Processed {} exs, moving on.'.format(
                      opt['dict_max_exs']))
                # don't wait too long...
                break

            world_dict.parley()

        # we need to save the dictionary to load it in memnn (sort it by freq)
        dictionary.sort()
        dictionary.save('/tmp/dict.txt', sort=True)

    print('Dictionary ready, moving on to training.')

    opt['datatype'] = 'train'
    agent = ParsedRemoteAgent(opt, {'dictionary_shared': dictionary.share()})
    world_train = create_task(opt, agent)
    opt['datatype'] = 'valid'
    world_valid = create_task(opt, agent)

    start = time.time()
    with world_train:
        for _ in range(opt['num_its']):
            print('[ training ]')
            for _ in range(opt['num_examples'] * opt.get('numthreads', 1)):
                world_train.parley()
            world_train.synchronize()

            print('[ validating ]')
            world_valid.reset()
            for _ in world_valid:  # check valid accuracy
                world_valid.parley()

            print('[ validation summary. ]')
            report_valid = world_valid.report()
            print(report_valid)
            if report_valid['accuracy'] > 0.95:
                break

        # show some example dialogs after training:
        world_valid = create_task(opt, agent)
        for _k in range(3):
            world_valid.parley()
            print(world_valid.display())

    print('finished in {} s'.format(round(time.time() - start, 2)))