def testSubsetFiles(meta_val_file,meta_synset_words_file,sub_val_file,sub_synset_words_file,excluded_ids_all_file,removePath): meta_list_files_val,meta_idx_val=zip(*imagenet.readLabelsFile(meta_val_file)); meta_val_idx_mapped=[int(idx_curr) for idx_curr in meta_idx_val]; meta_val_idx_mapped=np.array(meta_val_idx_mapped); t=time.time(); meta_ids,meta_labels=zip(*imagenet.readLabelsFile(meta_synset_words_file)); meta_ids=np.array(meta_ids); meta_val_ids_mapped=np.empty((len(meta_list_files_val),),dtype='object'); for idx_curr in np.unique(meta_val_idx_mapped): idx_rel=meta_val_idx_mapped==idx_curr; meta_val_ids_mapped[idx_rel]=meta_ids[idx_curr]; meta_val_ids_mapped=list(meta_val_ids_mapped); print time.time()-t sub_list_files_val,sub_idx_val=zip(*imagenet.readLabelsFile(sub_val_file)); sub_val_idx_mapped=[int(idx_curr) for idx_curr in sub_idx_val]; excluded_ids_all=imagenet.readSynsetsFile(excluded_ids_all_file); #all excluded files are excluded check_exclude=testFilesExcludedProperly(sub_list_files_val,meta_list_files_val,meta_val_ids_mapped,excluded_ids_all) #all included files are included check_include=testFilesIncludedProperly(sub_list_files_val,meta_list_files_val,meta_val_ids_mapped,excluded_ids_all) #all indices labeling is consistent check_labeling=testFilesLabeledProperly(sub_val_idx_mapped,sub_list_files_val,meta_val_idx_mapped,meta_list_files_val) return check_exclude,check_include,check_labeling;
def testSubsetFiles(meta_val_file, meta_synset_words_file, sub_val_file, sub_synset_words_file, excluded_ids_all_file, removePath): meta_list_files_val, meta_idx_val = zip( *imagenet.readLabelsFile(meta_val_file)) meta_val_idx_mapped = [int(idx_curr) for idx_curr in meta_idx_val] meta_val_idx_mapped = np.array(meta_val_idx_mapped) t = time.time() meta_ids, meta_labels = zip( *imagenet.readLabelsFile(meta_synset_words_file)) meta_ids = np.array(meta_ids) meta_val_ids_mapped = np.empty((len(meta_list_files_val), ), dtype='object') for idx_curr in np.unique(meta_val_idx_mapped): idx_rel = meta_val_idx_mapped == idx_curr meta_val_ids_mapped[idx_rel] = meta_ids[idx_curr] meta_val_ids_mapped = list(meta_val_ids_mapped) print time.time() - t sub_list_files_val, sub_idx_val = zip( *imagenet.readLabelsFile(sub_val_file)) sub_val_idx_mapped = [int(idx_curr) for idx_curr in sub_idx_val] excluded_ids_all = imagenet.readSynsetsFile(excluded_ids_all_file) #all excluded files are excluded check_exclude = testFilesExcludedProperly(sub_list_files_val, meta_list_files_val, meta_val_ids_mapped, excluded_ids_all) #all included files are included check_include = testFilesIncludedProperly(sub_list_files_val, meta_list_files_val, meta_val_ids_mapped, excluded_ids_all) #all indices labeling is consistent check_labeling = testFilesLabeledProperly(sub_val_idx_mapped, sub_list_files_val, meta_val_idx_mapped, meta_list_files_val) return check_exclude, check_include, check_labeling
def main(): meta_dir = "../../data/ilsvrc12/" meta_synsets_file = os.path.join(meta_dir, "synsets.txt") meta_synset_words_file = os.path.join(meta_dir, "synset_words.txt") meta_train_file = os.path.join(meta_dir, "train.txt") meta_val_file = os.path.join(meta_dir, "val.txt") sub_dir = "/disk2/novemberExperiments/network_no_pascal" sub_synsets_file = os.path.join(sub_dir, "synsets.txt") sub_synset_words_file = os.path.join(sub_dir, "synset_words.txt") sub_train_file = os.path.join(sub_dir, "train.txt") sub_val_file = os.path.join(sub_dir, "val.txt") excluded_ids_all_file = os.path.join(sub_dir, "to_exclude_all.txt") # checks=tests.testSubsetFiles(meta_val_file,meta_synset_words_file,sub_val_file,sub_synset_words_file,excluded_ids_all_file,True); # print checks checks = tests.testSubsetFiles( meta_train_file, meta_synset_words_file, sub_train_file, sub_synset_words_file, excluded_ids_all_file, removePath=False, ) print checks return in_dir = "/disk2/octoberExperiments/nn_performance_without_pascal" out_dir = "/disk2/novemberExperiments/network_no_pascal" parent_synset_words_file = os.path.join(in_dir, "synset_words.txt") parent_val_file = os.path.join(in_dir, "new_val.txt") parent_train_file = os.path.join(in_dir, "train.txt") parent_synset_file = os.path.join(in_dir, "synsets.txt") new_val_file = os.path.join(out_dir, "val.txt") new_train_file = os.path.join(out_dir, "train.txt") new_synset_file = os.path.join(out_dir, "synsets.txt") new_synset_words_file = os.path.join(out_dir, "synset_words.txt") to_exclude_text_file = os.path.join(out_dir, "to_exclude.txt") to_exclude_meta_file = os.path.join(out_dir, "to_exclude_all.txt") to_exclude_2 = imagenet.readSynsetsFile(to_exclude_text_file) to_exclude_1 = imagenet.readSynsetsFile(os.path.join(in_dir, "to_exclude.txt")) print len(to_exclude_1), len(to_exclude_2) to_exclude_all = to_exclude_1 + to_exclude_2 with open(to_exclude_meta_file, "wb") as f: for id_curr in to_exclude_all: f.write(id_curr + "\n") print to_exclude_meta_file return params = createParams("setUpFilesForTrainingOnSubset") params = params( parent_synset_words_file=parent_synset_words_file, parent_val_file=parent_val_file, parent_train_file=parent_train_file, parent_synset_file=parent_synset_file, new_val_file=new_val_file, new_train_file=new_train_file, new_synset_file=new_synset_file, new_synset_words_file=new_synset_words_file, to_exclude_text_file=to_exclude_text_file, ) script_setUpFilesForTrainingOnSubset(params)