def create_batches(self, name, dialogues, batch_size): if not os.path.isdir(self.cache): os.makedirs(self.cache) cache_file = os.path.join(self.cache, '%s_batches.pkl' % name) if (not os.path.exists(cache_file)) or self.ignore_cache: for dialogue in dialogues: dialogue.convert_to_int() dialogue_batches = self.create_dialogue_batches( dialogues, batch_size) print 'Write %d batches to cache %s' % (len(dialogue_batches), cache_file) start_time = time.time() write_pickle(dialogue_batches, cache_file) print '[%d s]' % (time.time() - start_time) else: start_time = time.time() dialogue_batches = read_pickle(cache_file) print 'Read %d batches from cache %s' % (len(dialogue_batches), cache_file) print '[%d s]' % (time.time() - start_time) return dialogue_batches
def create_trie(self, batches, path): if path is None: return None def seq_iter(batches): for batch in batches: for b in batch['batch_seq']: targets = b['targets'] for target in targets: yield target if not os.path.exists(path): trie = Trie() print 'Build prefix trie of length', trie.max_prefix_len start_time = time.time() trie.build_trie(seq_iter(batches)) print '[%d s]' % (time.time() - start_time) print 'Write trie to', path write_pickle(trie, path) else: print 'Read trie from', path trie = read_pickle(path) return trie
parser.add_argument('--max-examples', default=-1, type=int) parser.add_argument('--output', help='Path to save templates') parser.add_argument('--output-transcripts', help='Path to JSON examples with templates') parser.add_argument('--templates', help='Path to load templates') parser.add_argument('--debug', default=False, action='store_true') args = parser.parse_args() if args.templates: templates = Templates.from_pickle(args.templates) else: price_tracker = PriceTracker(args.price_tracker_model) template_extractor = TemplateExtractor(price_tracker) template_extractor.extract_templates(args.transcripts, args.max_examples) write_pickle(template_extractor.templates, args.output) templates = Templates(template_extractor.templates) t = templates.templates response_tags = set(t.response_tag.values) tag_counts = [] for tag in response_tags: tag_counts.append( (tag, t[t.response_tag == tag].shape[0] / float(t.shape[0]))) tag_counts = sorted(tag_counts, key=lambda x: x[1], reverse=True) for x in tag_counts: print x import sys sys.exit()
def save(self, output): assert self.finalized write_pickle(self.templates, output)
def save_pickle(self, path): print 'Dump lexicon to {}'.format(path) write_pickle({'entities': self.entities, 'lsh': self.lsh}, path)
def save(self, output): data = {'model': self.model, 'actions': self.actions} write_pickle(data, output)
def save_pickle(self, path): print 'Dump templates to {}'.format(path) write_pickle(self.tables, path)
mappings = read_pickle(vocab_path) for k, v in mappings.iteritems(): print k, v.size schema = Schema(model_args.schema_path, None) data_generator = get_data_generator(args, model_args, mappings, schema) for d, n in data_generator.num_examples.iteritems(): logstats.add('data', d, 'num_dialogues', n) # Save mappings if not mappings: mappings = data_generator.mappings vocab_path = os.path.join(args.mappings, 'vocab.pkl') write_pickle(mappings, vocab_path) for name, m in mappings.iteritems(): logstats.add('mappings', name, 'size', m.size) # Build the model logstats.add_args('model_args', model_args) model = build_model(schema, mappings, data_generator.trie, model_args) # Tensorflow config if args.gpu == 0: print 'GPU is disabled' config = tf.ConfigProto(device_count={'GPU': 0}) else: print 'Using GPU' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5, allow_growth=True)