def script_setUpFilesForTrainingOnSubset(params): parent_synset_words_file = params.parent_synset_words_file parent_val_file = params.parent_val_file parent_train_file = params.parent_train_file parent_synset_file = params.parent_synset_file new_val_file = params.new_val_file new_train_file = params.new_train_file new_synset_file = params.new_synset_file new_synset_words_file = params.new_synset_words_file to_exclude_text_file = params.to_exclude_text_file val_ids = imagenet.readLabelsFile(parent_synset_words_file) val_just_ids = list(zip(*val_ids)[0]) val_just_labels = list(zip(*val_ids)[1]) with open(to_exclude_text_file, "rb") as f: to_exclude = f.readlines() to_exclude = [to_exclude_curr.strip("\n") for to_exclude_curr in to_exclude] ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass( parent_val_file, parent_synset_file, to_exclude ) classes_uni_val = imagenet.writeNewDataClassFile(new_val_file, zip(ims_to_keep, classes_to_keep)) ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass( parent_train_file, parent_synset_file, to_exclude ) classes_uni_train = imagenet.writeNewDataClassFile(new_train_file, zip(ims_to_keep, classes_to_keep)) assert str(classes_uni_val) == str(classes_uni_train) with open(new_synset_file, "wb") as f: for class_id in classes_uni_train: f.write(class_id + "\n") with open(new_synset_words_file, "wb") as f: for class_id in classes_uni_train: f.write(class_id + " " + val_just_labels[val_just_ids.index(class_id)] + "\n") with open(new_synset_file, "rb") as f: content = f.read() # sanity check for id_to_exclude in to_exclude: if id_to_exclude in content: print "FOUND ERROR", id_to_exclude
def script_setUpPascalExcludedTextFiles(): path_to_file = '../../data/ilsvrc12/synset_words.txt' val_ids = imagenet.readLabelsFile(path_to_file) val_just_ids = list(zip(*val_ids)[0]) val_just_labels = list(zip(*val_ids)[1]) pascal_ids_file = '/disk2/octoberExperiments/nn_performance_without_pascal/pascal_classes.txt' pascal_ids = imagenet.readLabelsFile(pascal_ids_file) pascal_just_ids = list(zip(*pascal_ids)[0]) to_exclude = imagenet.removeClassesWithOverlap(val_just_ids, pascal_just_ids) im_list_file = '../../data/ilsvrc12/val.txt' mapping_file = '../../data/ilsvrc12/synsets.txt' ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass( im_list_file, mapping_file, to_exclude) new_file_val = '/disk2/octoberExperiments/nn_performance_without_pascal/val.txt' classes_uni_val = writeNewDataClassFile(new_file_val, zip(ims_to_keep, classes_to_keep)) im_list_file = '../../data/ilsvrc12/train.txt' ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass( im_list_file, mapping_file, to_exclude) new_file_val = '/disk2/octoberExperiments/nn_performance_without_pascal/train.txt' classes_uni_train = imagenet.writeNewDataClassFile( new_file_val, zip(ims_to_keep, classes_to_keep)) assert (str(classes_uni_val) == str(classes_uni_train)) class_file = '/disk2/octoberExperiments/nn_performance_without_pascal/synsets.txt' with open(class_file, 'wb') as f: for class_id in classes_uni_train: f.write(class_id + '\n') with open(new_file_val, 'rb') as f: content = f.read() #sanity check for id_to_exclude in to_exclude: if id_to_exclude in content: print 'FOUND ERROR', id_to_exclude