class Experiment: PCA_COMPRESS_WORD_DIM = 100 PCA_COMPRESS_IMG_DIM = 100 SEED_NUM = None MAX_DIM = 300 MIN_DIM = 10 DIM_STEP = 10 CONFIG_YAML = 'config.yml' def __init__(self, line_flag=False): # log setting program = os.path.basename(__name__) self.logger = logging.getLogger(program) logging.basicConfig( format='%(asctime)s : %(name)s : %(levelname)s : %(message)s') # load config file f = open(Experiment.CONFIG_YAML, 'r') self.config = yaml.load(f) f.close() self.english_corpus_dir = self.config['english_corpus_dir'] self.japanese_corpus_dir = self.config['japanese_corpus_dir'] self.japanese_original_corpus_dir = self.config[ 'japanese_original_corpus_dir'] self.img_features_npy = self.config['img_features_npy'] self.img_original_dir = self.config['img_original_dir'] self.img_correspondence_path = self.config['img_correspondence_path'] self.joint = Joint(self.english_corpus_dir, self.img_features_npy, self.japanese_corpus_dir, self.img_original_dir, self.img_correspondence_path, self.japanese_original_corpus_dir, Experiment.PCA_COMPRESS_WORD_DIM, Experiment.PCA_COMPRESS_IMG_DIM, line_flag) self.logger.info("<Initilalizing Experiment>") if Experiment.SEED_NUM is not None: self.logger.info("seed: %s", Experiment.SEED_NUM) np.random.seed(Experiment.SEED_NUM) def process_features(self): self.joint.create_features() self.joint.pca_train_and_test_data() def fit_changing_sample_num(self, sample_num_list): data_num = self.joint.english_feature.get_train_data_num() for s in sample_num_list: sampled_indices = feat.BaseFeature.sample_indices(data_num, s) self.joint.gcca_fit(s, 0.1, sampled_indices) self.joint.cca_fit(s, 0.1, sampled_indices) def calc_accuracy(self, start_dim=1, end_dim=100, dim_step=1): res_cca_list = [] res_gcca_list = [] print "|dim|CCA|GCCA|" for i in xrange(start_dim, end_dim, dim_step): res_cca = self.cca_calc_search_precision(i) res_gcca = self.gcca_calc_search_precision(i) print "|%d|%f|%f|" % (i, res_cca, res_gcca) res_cca_list.append(res_cca) res_gcca_list.append(res_gcca) return res_cca_list, res_gcca_list def plot_result(self, sample_num=500, reg_param=0.1): self.joint.gcca_transform(sample_num, reg_param) self.joint.cca_transform(sample_num, reg_param) self.joint.cca_plot() self.joint.gcca_plot() def calc_accuracy_changing_sample_num(self, sample_num_list, reg_param=0.1): res_cca_data = [] res_gcca_data = [] for sample_num in sample_num_list: self.joint.gcca_transform(sample_num, reg_param) self.joint.cca_transform(sample_num, reg_param) res_cca_list, res_gcca_list = self.calc_accuracy( Experiment.MIN_DIM, Experiment.MAX_DIM + 1, Experiment.DIM_STEP) res_cca_data.append(res_cca_list) res_gcca_data.append(res_gcca_list) res_cca_arr = np.array(res_cca_data) res_gcca_arr = np.array(res_gcca_data) # np.save('output/results/res_cca_arr.npy', res_cca_arr) # np.save('output/results/res_gcca_arr.npy', res_gcca_arr) # joint.gcca_transform(mode='PART', line_flag=True, step=5) # res_cca_arr = np.load('output/results/res_cca_arr.npy') # res_gcca_arr = np.load('output/results/res_gcca_arr.npy') def fit_chenging_regparam(self, reg_params, sample_num=500): data_num = self.joint.english_feature.get_train_data_num() for r in reg_params: sampled_indices = feat.BaseFeature.sample_indices( data_num, sample_num) self.joint.gcca_fit(sample_num, r, sampled_indices) self.joint.cca_fit(sample_num, r, sampled_indices) def calc_accuracy_changing_reg_params(self, sample_num, reg_list, col_num=5): res_cca_data = [] res_gcca_data = [] for reg in reg_list: self.joint.gcca_transform(sample_num, reg) self.joint.cca_transform(sample_num, reg) res_cca_list, res_gcca_list = self.calc_accuracy( Experiment.MIN_DIM, Experiment.MAX_DIM + 1, Experiment.DIM_STEP) res_cca_data.append(res_cca_list) res_gcca_data.append(res_gcca_list) res_cca_arr = np.array(res_cca_data) res_gcca_arr = np.array(res_gcca_data) np.save('output/results/res_cca_reg_arr.npy', res_cca_arr) np.save('output/results/res_gcca_reg_arr.npy', res_gcca_arr) # joint.gcca_transform(line_flag=True, step=5) # res_cca_arr = np.load('output/results/res_cca_reg_arr.npy') # res_gcca_arr = np.load('output/results/res_gcca_reg_arr.npy') self.plot_results(res_cca_arr, res_gcca_arr, reg_list, col_num, 'REG') def plot_original_data(self): self.joint.plot_original_data() def cca_calc_search_precision(self, min_dim, neighbor_num=1): en_mat, jp_mat = self.joint.cca.z_list[ 0][:, :min_dim], self.joint.cca.z_list[1][:, :min_dim] nn = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(en_mat) dists, nn_indices = nn.kneighbors(jp_mat, neighbor_num, return_distance=True) hit_count = 0 for j_idx, nn_indices_row in enumerate(nn_indices): # print nn_indices_row if j_idx in nn_indices_row: # print True hit_count += 1 else: pass # print False return float(hit_count) / len(nn_indices) * 100 def gcca_calc_search_precision(self, min_dim, neighbor_num=1): en_mat, im_mat, jp_mat = self.joint.gcca.z_list[ 0][:, :min_dim], self.joint.gcca.z_list[ 1][:, :min_dim], self.joint.gcca.z_list[2][:, :min_dim] nn = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(en_mat) dists, nn_indices = nn.kneighbors(jp_mat, neighbor_num, return_distance=True) hit_count = 0 for j_idx, nn_indices_row in enumerate(nn_indices): # print nn_indices_row if j_idx in nn_indices_row: # print True hit_count += 1 else: # print False pass return float(hit_count) / len(nn_indices) * 100 def plot_results(self, res_cca, res_gcca, title_list, col_num=2, mode='SAMPLE'): data_num = len(res_cca) row_num = data_num / col_num if row_num - float(data_num) / col_num != 0: print row_num row_num = row_num + 1 fig = plt.figure() # plt.title('Accuracy') for i, (title, row_cca, row_gcca) in enumerate(zip(title_list, res_cca, res_gcca)): plt.subplot(row_num, col_num, i + 1) plt.plot(np.arange(len(row_cca)) * 10 + 10, row_cca, '-r') plt.plot(np.arange(len(row_gcca)) * 10 + 10, row_gcca, '-b') x_min, x_max = plt.gca().get_xlim() y_min, y_max = plt.gca().get_ylim() if mode == 'SAMPLE': plt.text(0.5 * (x_min + x_max), 0.5 * (y_min + y_max), 'sample:%d' % title, ha='center', va='center', color='gray') elif mode == 'REG': plt.text(0.5 * (x_min + x_max), 0.5 * (y_min + y_max), 'reg:%s' % title, ha='center', va='center', color='gray') plt.tight_layout() plt.show()
class Experiment: PCA_COMPRESS_WORD_DIM = 100 PCA_COMPRESS_IMG_DIM = 100 SEED_NUM = None MAX_DIM = 300 MIN_DIM = 10 DIM_STEP = 10 CONFIG_YAML = 'config.yml' def __init__(self, line_flag=False): # log setting program = os.path.basename(__name__) self.logger = logging.getLogger(program) logging.basicConfig(format='%(asctime)s : %(name)s : %(levelname)s : %(message)s') # load config file f = open(Experiment.CONFIG_YAML, 'r') self.config = yaml.load(f) f.close() self.english_corpus_dir = self.config['english_corpus_dir'] self.japanese_corpus_dir = self.config['japanese_corpus_dir'] self.japanese_original_corpus_dir = self.config['japanese_original_corpus_dir'] self.img_features_npy = self.config['img_features_npy'] self.img_original_dir = self.config['img_original_dir'] self.img_correspondence_path = self.config['img_correspondence_path'] self.joint = Joint( self.english_corpus_dir, self.img_features_npy, self.japanese_corpus_dir, self.img_original_dir, self.img_correspondence_path, self.japanese_original_corpus_dir, Experiment.PCA_COMPRESS_WORD_DIM, Experiment.PCA_COMPRESS_IMG_DIM, line_flag ) self.logger.info("<Initilalizing Experiment>") if Experiment.SEED_NUM is not None: self.logger.info("seed: %s" , Experiment.SEED_NUM) np.random.seed(Experiment.SEED_NUM) def process_features(self): self.joint.create_features() self.joint.pca_train_and_test_data() def fit_changing_sample_num(self, sample_num_list): data_num = self.joint.english_feature.get_train_data_num() for s in sample_num_list: sampled_indices = feat.BaseFeature.sample_indices(data_num, s) self.joint.gcca_fit(s, 0.1, sampled_indices) self.joint.cca_fit(s, 0.1, sampled_indices) def calc_accuracy(self, start_dim=1, end_dim=100, dim_step=1): res_cca_list = [] res_gcca_list = [] print "|dim|CCA|GCCA|" for i in xrange(start_dim, end_dim, dim_step): res_cca = self.cca_calc_search_precision(i) res_gcca = self.gcca_calc_search_precision(i) print "|%d|%f|%f|" % (i, res_cca, res_gcca) res_cca_list.append(res_cca) res_gcca_list.append(res_gcca) return res_cca_list, res_gcca_list def plot_result(self, sample_num=500, reg_param=0.1): self.joint.gcca_transform(sample_num, reg_param) self.joint.cca_transform(sample_num, reg_param) self.joint.cca_plot() self.joint.gcca_plot() def calc_accuracy_changing_sample_num(self, sample_num_list, reg_param=0.1): res_cca_data = [] res_gcca_data = [] for sample_num in sample_num_list: self.joint.gcca_transform(sample_num, reg_param) self.joint.cca_transform(sample_num, reg_param) res_cca_list, res_gcca_list = self.calc_accuracy(Experiment.MIN_DIM, Experiment.MAX_DIM + 1, Experiment.DIM_STEP) res_cca_data.append(res_cca_list) res_gcca_data.append(res_gcca_list) res_cca_arr = np.array(res_cca_data) res_gcca_arr = np.array(res_gcca_data) # np.save('output/results/res_cca_arr.npy', res_cca_arr) # np.save('output/results/res_gcca_arr.npy', res_gcca_arr) # joint.gcca_transform(mode='PART', line_flag=True, step=5) # res_cca_arr = np.load('output/results/res_cca_arr.npy') # res_gcca_arr = np.load('output/results/res_gcca_arr.npy') def fit_chenging_regparam(self, reg_params, sample_num=500): data_num = self.joint.english_feature.get_train_data_num() for r in reg_params: sampled_indices = feat.BaseFeature.sample_indices(data_num, sample_num) self.joint.gcca_fit(sample_num, r, sampled_indices) self.joint.cca_fit(sample_num, r, sampled_indices) def calc_accuracy_changing_reg_params(self, sample_num, reg_list, col_num=5): res_cca_data = [] res_gcca_data = [] for reg in reg_list: self.joint.gcca_transform(sample_num,reg) self.joint.cca_transform(sample_num, reg) res_cca_list, res_gcca_list = self.calc_accuracy(Experiment.MIN_DIM, Experiment.MAX_DIM + 1 , Experiment.DIM_STEP) res_cca_data.append(res_cca_list) res_gcca_data.append(res_gcca_list) res_cca_arr = np.array(res_cca_data) res_gcca_arr = np.array(res_gcca_data) np.save('output/results/res_cca_reg_arr.npy', res_cca_arr) np.save('output/results/res_gcca_reg_arr.npy', res_gcca_arr) # joint.gcca_transform(line_flag=True, step=5) # res_cca_arr = np.load('output/results/res_cca_reg_arr.npy') # res_gcca_arr = np.load('output/results/res_gcca_reg_arr.npy') self.plot_results(res_cca_arr, res_gcca_arr, reg_list, col_num, 'REG') def plot_original_data(self): self.joint.plot_original_data() def cca_calc_search_precision(self, min_dim, neighbor_num=1): en_mat, jp_mat = self.joint.cca.z_list[0][:, :min_dim], self.joint.cca.z_list[1][:, :min_dim] nn = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(en_mat) dists, nn_indices = nn.kneighbors(jp_mat, neighbor_num, return_distance=True) hit_count = 0 for j_idx, nn_indices_row in enumerate(nn_indices): # print nn_indices_row if j_idx in nn_indices_row: # print True hit_count += 1 else: pass # print False return float(hit_count) / len(nn_indices) * 100 def gcca_calc_search_precision(self, min_dim, neighbor_num=1): en_mat, im_mat, jp_mat = self.joint.gcca.z_list[0][:, :min_dim], self.joint.gcca.z_list[1][:, :min_dim], self.joint.gcca.z_list[2][:, :min_dim] nn = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(en_mat) dists, nn_indices = nn.kneighbors(jp_mat, neighbor_num, return_distance=True) hit_count = 0 for j_idx, nn_indices_row in enumerate(nn_indices): # print nn_indices_row if j_idx in nn_indices_row: # print True hit_count += 1 else: # print False pass return float(hit_count) / len(nn_indices) * 100 def plot_results(self, res_cca, res_gcca, title_list, col_num=2, mode='SAMPLE'): data_num = len(res_cca) row_num = data_num / col_num if row_num - float(data_num)/col_num != 0: print row_num row_num = row_num + 1 fig = plt.figure() # plt.title('Accuracy') for i, (title, row_cca, row_gcca) in enumerate(zip(title_list, res_cca, res_gcca)): plt.subplot(row_num , col_num, i + 1) plt.plot(np.arange(len(row_cca)) * 10 + 10, row_cca, '-r') plt.plot(np.arange(len(row_gcca)) * 10 + 10, row_gcca, '-b') x_min, x_max = plt.gca().get_xlim() y_min, y_max = plt.gca().get_ylim() if mode == 'SAMPLE': plt.text(0.5 * (x_min + x_max), 0.5 * (y_min + y_max), 'sample:%d' % title, ha='center', va='center', color='gray') elif mode == 'REG': plt.text(0.5 * (x_min + x_max), 0.5 * (y_min + y_max), 'reg:%s' % title, ha='center', va='center', color='gray') plt.tight_layout() plt.show()
__author__ = 'rupy' import logging from joint import Joint if __name__ == "__main__": logging.root.setLevel(level=logging.INFO) english_corpus_dir = '../PascalSentenceDataset/english/' japanese_corpus_dir = '../PascalSentenceDataset/line_wakati/' japanese_original_corpus_dir = '../PascalSentenceDataset/japanese/' img_features_npy = 'pascal_features.npy' img_original_dir = '../PascalSentenceDataset/dataset/' img_correspondence_path = "../PascalSentenceDataset/correspondence.csv" joint = Joint(english_corpus_dir, img_features_npy, japanese_corpus_dir, img_original_dir, img_correspondence_path, japanese_original_corpus_dir) # retrieval joint.create_features() joint.pca_train_and_test_data() joint.cca_transform(line_flag=False, step=1, reg_param=0.1) joint.cca_plot() joint.gcca_transform(line_flag=False, step=1, reg_param=0.1) joint.gcca_plot() # joint.retrieval_j2e_by_cca(3) joint.retrieval_j2e_by_gcca(3) # joint.retrieval_j2i_by_gcca(3)
if __name__=="__main__": logging.root.setLevel(level=logging.INFO) english_corpus_dir = '../PascalSentenceDataset/english/' japanese_corpus_dir = '../PascalSentenceDataset/line_wakati/' japanese_original_corpus_dir = '../PascalSentenceDataset/japanese/' img_features_npy = 'pascal_features.npy' img_original_dir = '../PascalSentenceDataset/dataset/' img_correspondence_path = "../PascalSentenceDataset/correspondence.csv" joint = Joint( english_corpus_dir, img_features_npy, japanese_corpus_dir, img_original_dir, img_correspondence_path, japanese_original_corpus_dir ) # retrieval joint.create_features() joint.pca_train_and_test_data() joint.cca_transform(line_flag=False, step=1, reg_param=0.1) joint.cca_plot() joint.gcca_transform(line_flag=False, step=1, reg_param=0.1) joint.gcca_plot() # joint.retrieval_j2e_by_cca(3) joint.retrieval_j2e_by_gcca(3) # joint.retrieval_j2i_by_gcca(3)