r_dict[r[i]] = count count += 1 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--repository_path", type=str, required=True) parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--output_path", type=str, required=True) args = parser.parse_args() repository_path = args.repository_path data_path = args.data_path tr, v, tst = read_data(data_path) ratios = utils.read_qprobs(repository_path) create_ratio_dict(ratios) print("Processing training set") tr_inp, tr_m_out, tr_q_out, tr_r_out, tr_inp_names, tr_inp_years = load_data( tr) print("Processing validation set") v_inp, v_m_out, v_q_out, v_r_out, v_inp_names, v_inp_years = load_data(v) print("Processing test set") t_inp, t_m_out, t_q_out, t_r_out, t_inp_names, t_inp_years = load_data(tst) token2id = {} id2token = {}
valid = pickle.load(in_file) dataset_v = valid["dataset_v"] v_m_out = valid["v_m_out"] v_q_out = valid["v_q_out"] v_r_out = valid["v_r_out"] print("Loading filename: {}".format(args.embeddings_filename)) embeddings_index = {} with open(args.embeddings_filename) as in_file: for line in in_file: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs embedding_matrix = np.zeros((len(token2id) + 1, 300)) for word, i in token2id.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # Load visual data tr_inds, v_inds, t_inds = read_indices(args.vision_dataset_path) ratios = utils.read_qprobs(args.vision_dataset_path) tr_size = 11900 v_size = 1700 create_ratio_dict(ratios) tr_inp, tr_m_out, tr_q_out, tr_r_out = read_images(tr_inds, tr_size) v_inp, v_m_out, v_q_out, v_r_out = read_images(v_inds, v_size)