def prepare_data(root_path): param = get_params(root_path, os.path.join(root_path, 'text_task_resnet'), test_state=None) test_dir_loc = os.path.join(root_path, "history") dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] test_data_file = param['test_data_file'] max_utter = param['max_utter'] max_len = param['max_len'] max_images = param['max_images'] preparedata = PrepareData(max_utter, max_len, max_images, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, "text", cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print 'found existing vocab file in ' + str( vocab_file) + ', ... reading from there' preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "test_smallest"), test_data_file, isTrain=False, isTest=True)
def get_dialog_dict_for_test(param): test_dir_loc = param['test_dir_loc'] dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] response_vocab_file = param['response_vocab_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] test_data_file = param['test_data_file'] max_utter = param['max_utter'] max_len = param['max_len'] stopwords = param['stopwords'] stopwords_histogram = param['stopwords_histogram'] max_mem_size = param['memory_size'] max_target_size = param['gold_target_size'] ques_type_id = param['ques_type_id'] ques_type_name = param['ques_type_name'] vocab_max_len = param['vocab_max_len'] wikidata_dir = param['wikidata_dir'] lucene_dir = param['lucene_dir'] transe_dir = param['transe_dir'] glove_dir = param['glove_dir'] preparedata = PrepareData(max_utter, max_len, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, kb_pad_idx, nkb, stopwords, stopwords_histogram, lucene_dir, transe_dir, wikidata_dir, glove_dir, max_mem_size, max_target_size, vocab_max_len, True, cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print 'found existing vocab file in ' + str(vocab_file) + ', ... reading from there' print 'to delete later ', os.path.join(dump_dir_loc, "train") preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, response_vocab_file, os.path.join(dump_dir_loc, "test"), test_data_file, ques_type_id, ques_type_name)
def read_data(root_path): ImageUrlToIndex = pkl.load( open(path.join(root_path, 'data', 'Img_Fea_Dic.pkl'))) ImageFea = np.load(open(path.join(root_path, 'data', 'Img_Fea.npy'))) param = get_params(sys.argv[1]) train_dir_loc = param['train_dir_loc'] valid_dir_loc = param['valid_dir_loc'] test_dir_loc = param['test_dir_loc'].replace('test', 'test_smallest') dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] train_data_file = param['train_data_file'] valid_data_file = param['valid_data_file'] test_data_file = param['test_data_file'].replace('test', 'test_smallest') # print test_data_file # sys.exit(1) max_utter = param['max_utter'] max_len = param['max_len'] max_images = param['max_images'] preparedata = PrepareData(max_utter, max_len, max_images, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, "text", cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print 'found existing vocab file in ' + str( vocab_file) + ', ... reading from there' preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "test_smallest"), test_data_file)
def get_dialog_dict(param, is_test=False): train_dir_loc = param['train_dir_loc'] valid_dir_loc = param['valid_dir_loc'] test_dir_loc = param['test_dir_loc'] dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] train_data_file = param['train_data_file'] valid_data_file = param['valid_data_file'] test_data_file = param['test_data_file'] max_utter = param['max_utter'] max_len = param['max_len'] max_images = param['max_images'] max_negs = param['max_negs'] if 'test_state' in param: test_state = param['test_state'] else: test_state = None preparedata = PrepareData(max_utter, max_len, max_images, max_negs, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, "image", cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print('found existing vocab file in ' + str(vocab_file) + ', ... reading from there') if not is_test: preparedata.prepare_data(train_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "train"), train_data_file, True, False, None) preparedata.prepare_data(valid_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "valid"), valid_data_file, False, False, None) if test_state is not None: preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc + "/test_data_file_state/", "test_" + test_state), test_data_file, False, True, test_state) else: preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "test"), test_data_file, False, True, test_state)
train_dir_loc = param['train_dir_loc'] valid_dir_loc = param['valid_dir_loc'] test_dir_loc = param['test_dir_loc'].replace('test', 'test_smallest') dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] train_data_file = param['train_data_file'] valid_data_file = param['valid_data_file'] test_data_file = param['test_data_file'].replace('test', 'test_smallest') #print test_data_file #sys.exit(1) max_utter = param['max_utter'] max_len = param['max_len'] max_images = param['max_images'] preparedata = PrepareData(max_utter, max_len, max_images, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, "text", cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print 'found existing vocab file in ' + str( vocab_file) + ', ... reading from there' preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "test_smallest"), test_data_file)
test_dir_loc = param['test_dir_loc'] dump_dir_loc = param['dump_dir_loc'] vocab_file = param['vocab_file'] vocab_stats_file = param['vocab_stats_file'] vocab_freq_cutoff = param['vocab_freq_cutoff'] test_data_file = param['test_data_file'] max_utter = param['max_utter'] max_len = param['max_len'] max_images = param['max_images'] preparedata = PrepareData(max_utter, max_len, max_images, start_symbol_index, end_symbol_index, unk_symbol_index, pad_symbol_index, "text", cutoff=vocab_freq_cutoff) if os.path.isfile(vocab_file): print 'found existing vocab file in ' + str( vocab_file) + ', ... reading from there' preparedata.prepare_data(test_dir_loc, vocab_file, vocab_stats_file, os.path.join(dump_dir_loc, "test_smallest"), test_data_file, isTrain=False, isTest=True)