Ejemplo n.º 1
0
    def create_batches(self, name, dialogues, batch_size):
        if not os.path.isdir(self.cache):
            os.makedirs(self.cache)
        cache_file = os.path.join(self.cache, '%s_batches.pkl' % name)
        if (not os.path.exists(cache_file)) or self.ignore_cache:
            for dialogue in dialogues:
                dialogue.convert_to_int()

            dialogue_batches = self.create_dialogue_batches(
                dialogues, batch_size)
            print 'Write %d batches to cache %s' % (len(dialogue_batches),
                                                    cache_file)
            start_time = time.time()
            write_pickle(dialogue_batches, cache_file)
            print '[%d s]' % (time.time() - start_time)
        else:
            start_time = time.time()
            dialogue_batches = read_pickle(cache_file)
            print 'Read %d batches from cache %s' % (len(dialogue_batches),
                                                     cache_file)
            print '[%d s]' % (time.time() - start_time)
        return dialogue_batches
Ejemplo n.º 2
0
    def create_trie(self, batches, path):
        if path is None:
            return None

        def seq_iter(batches):
            for batch in batches:
                for b in batch['batch_seq']:
                    targets = b['targets']
                    for target in targets:
                        yield target

        if not os.path.exists(path):
            trie = Trie()
            print 'Build prefix trie of length', trie.max_prefix_len
            start_time = time.time()
            trie.build_trie(seq_iter(batches))
            print '[%d s]' % (time.time() - start_time)
            print 'Write trie to', path
            write_pickle(trie, path)
        else:
            print 'Read trie from', path
            trie = read_pickle(path)
        return trie
Ejemplo n.º 3
0
    parser.add_argument('--max-examples', default=-1, type=int)
    parser.add_argument('--output', help='Path to save templates')
    parser.add_argument('--output-transcripts',
                        help='Path to JSON examples with templates')
    parser.add_argument('--templates', help='Path to load templates')
    parser.add_argument('--debug', default=False, action='store_true')
    args = parser.parse_args()

    if args.templates:
        templates = Templates.from_pickle(args.templates)
    else:
        price_tracker = PriceTracker(args.price_tracker_model)
        template_extractor = TemplateExtractor(price_tracker)
        template_extractor.extract_templates(args.transcripts,
                                             args.max_examples)
        write_pickle(template_extractor.templates, args.output)
        templates = Templates(template_extractor.templates)

    t = templates.templates
    response_tags = set(t.response_tag.values)
    tag_counts = []
    for tag in response_tags:
        tag_counts.append(
            (tag, t[t.response_tag == tag].shape[0] / float(t.shape[0])))
    tag_counts = sorted(tag_counts, key=lambda x: x[1], reverse=True)
    for x in tag_counts:
        print x

    import sys
    sys.exit()
Ejemplo n.º 4
0
 def save(self, output):
     assert self.finalized
     write_pickle(self.templates, output)
Ejemplo n.º 5
0
 def save_pickle(self, path):
     print 'Dump lexicon to {}'.format(path)
     write_pickle({'entities': self.entities, 'lsh': self.lsh}, path)
Ejemplo n.º 6
0
 def save(self, output):
     data = {'model': self.model, 'actions': self.actions}
     write_pickle(data, output)
Ejemplo n.º 7
0
 def save_pickle(self, path):
     print 'Dump templates to {}'.format(path)
     write_pickle(self.tables, path)
Ejemplo n.º 8
0
        mappings = read_pickle(vocab_path)
        for k, v in mappings.iteritems():
            print k, v.size

    schema = Schema(model_args.schema_path, None)

    data_generator = get_data_generator(args, model_args, mappings, schema)

    for d, n in data_generator.num_examples.iteritems():
        logstats.add('data', d, 'num_dialogues', n)

    # Save mappings
    if not mappings:
        mappings = data_generator.mappings
        vocab_path = os.path.join(args.mappings, 'vocab.pkl')
        write_pickle(mappings, vocab_path)
    for name, m in mappings.iteritems():
        logstats.add('mappings', name, 'size', m.size)

    # Build the model
    logstats.add_args('model_args', model_args)
    model = build_model(schema, mappings, data_generator.trie, model_args)

    # Tensorflow config
    if args.gpu == 0:
        print 'GPU is disabled'
        config = tf.ConfigProto(device_count={'GPU': 0})
    else:
        print 'Using GPU'
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5,
                                    allow_growth=True)