def val_score(self): """ Compute MAP@5 score for validation dataset """ val_known = load_pickle_file(val_known_file) tmp = load_pickle_file(val_submit_file) val_submit = [ img for (img, _) in tmp ] y_true = [ w for (_, w) in tmp ] del tmp fknown = self.branch_model.predict_generator(FeatureGen(val_known, self.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0) fsubmit = self.branch_model.predict_generator(FeatureGen(val_submit, self.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0) score = self.head_model.predict_generator(ScoreGen(fknown, fsubmit), max_queue_size=20, workers=8, verbose=0) score = self.score_reshape(score, fknown, fsubmit) img2whale = load_pickle_file(img2whale_file) best_5 = [] for i, _ in enumerate(tqdm(val_submit)): t = [] s = set() a = score[i,:] for j in list(reversed(np.argsort(a))): img = val_known[j] whale = img2whale[img] if whale not in s: s.add(whale) t.append(whale) if len(t) == 5: break assert len(t) == 5 and len(s) == 5 best_5.append(t) map_5 = self.map5(best_5, y_true) return map_5
def __init__(self, lr, l2, model_name, histories = [], img_shape=(384, 384, 1), step=0, use_val=True, small_dataset=False): self.model, self.branch_model, self.head_model = build_model(lr, l2) self.histories = histories self.step = step self.img_shape = img_shape self.img_gen = ImageGenerator() self.best_map5 = 0 self.model_name = model_name # Make callbacklist self.callback_list = self.make_callback_list() # Load train if small_dataset: self.train = load_pickle_file(train_examples_small_file) print('SMALL DATASET') else: self.train = load_pickle_file(train_examples_file) if small_dataset: validation_data = load_pickle_file(validation_examples_small_file) else: validation_data = load_pickle_file(validation_examples_file) if use_val: self.validation = ValData(validation_data, self.img_gen.read_for_testing, batch_size=16) else: self.validation = None # Make whale to training dict self.w2ts = self.make_w2ts()
def test_prepare_for_wikidata_function(self): the_so_called_correspondance = utils.load_pickle_file( "../tests/cellosaurus_informations_to_wikidata_ids.pickle") species = the_so_called_correspondance["species"] references = the_so_called_correspondance["references"] categories = utils.get_cell_line_category_to_wikidata("../project/category.txt") diseases = the_so_called_correspondance["diseases"] cellosaurus_dump_in_dictionary_format = utils.format_cellosaurus_dump_as_dictionary("../project/test_cellosaurus.txt") wikidata = utils.query_wikidata_for_cell_lines() releaseID = "Q87574023" login = wdi_login.WDLogin(WDUSER, WDPASS) cell_line = utils.CellossaurusCellLine(wdi_login_object=login, release_qid=releaseID, cellosaurus_dump=cellosaurus_dump_in_dictionary_format, wikidata_dictionary_with_existing_cell_lines=wikidata, references=references, species=species, cell_line_categories=categories, diseases=diseases, cell_line_id="CVCL_2260") data, data_to_delete = cell_line.prepare_for_wikidata() print(data) print(data_to_delete) self.assertEqual(1, 1) self.assertEqual(cell_line.cell_line_id, "CVCL_2260")
def test_save__load_pickle(self): test_dictionary = {"a": 1, "b": 2} utils.save_pickle_file(test_dictionary, "/tmp/test.pickle") test_dictionary_after_processing = utils.load_pickle_file("/tmp/test.pickle") os.remove("/tmp/test.pickle") self.assertEqual(test_dictionary, test_dictionary_after_processing)
def load_model(KB_id): """loads all necessary Data""" KB_id = 1 path_live = Path(ROOT) / "models" / f"KB_id_{KB_id}" / "live" # get the configuration config = configparser.ConfigParser() config.read(path_live / "config.cfg") config_set = "DEFAULT" without_stopwords = config[config_set].getboolean("without_stopwords") num_of_sentences = config[config_set].getint("num_of_sentences") all_docs_kb_filepath = path_live / config[config_set]["all_docs_kb_filename"] # load model and scaler model = load(path_live / "logreg_model.joblib") scaler = load_pickle_file(path_live / "std_scaler.pkl") # load the kb that the model was built on logger.info("# load the kb that the model was built on") all_docs_kb = load_json_file(all_docs_kb_filepath, logger) # load kb with vectors data_kb_with_vectors = load_pickle_file(path_live / DATA_KB_WITH_VECTORS_FILE) # load the two dictionaries qid_to_class, class_to_qid = ( load_pickle_file(path_live / "qid_to_class.pkl"), load_pickle_file(path_live / "class_to_qid.pkl"), ) return ( model, scaler, all_docs_kb, data_kb_with_vectors, qid_to_class, class_to_qid, without_stopwords, num_of_sentences, )
def test_make_statement(self): the_so_called_correspondance = utils.load_pickle_file( "../tests/cellosaurus_informations_to_wikidata_ids.pickle") references = the_so_called_correspondance["references"] statement = utils.make_statement(statement_property="P31", statement_value="Q5", references=references) statement_native_wdi = wdi_core.WDItemID(value="Q5", prop_nr="P31", references=references) self.assertEqual(statement, statement_native_wdi)
def main(): tic = time.time() known = load_pickle_file(train_known_file) submit = load_pickle_file(train_submit_file) print('inference HWI') args = parse_args() model_path = models_path + args.model_filename #submission_path = output_path + args.output_filename threshold = args.threshold # Load model weights = load_model(model_path).get_weights() # weights = keras.models.load_model(model_path, custom_objects={'contrastive_loss': contrastive_loss}).get_weights() model = Model(0, 0, 'submission', use_val=False) model.model.set_weights(weights) # Evaluate model fknown = model.branch_model.predict_generator(FeatureGen( known, model.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0) fsubmit = model.branch_model.predict_generator(FeatureGen( submit, model.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0) score = model.head_model.predict_generator(ScoreGen(fknown, fsubmit), max_queue_size=20, workers=8, verbose=0) score = model.score_reshape(score, fknown, fsubmit) # Generate submission file prepare_submission(threshold, args.output_filename, score, known, submit, args.model_filename) toc = time.time() print("Inference time: ", (toc - tic) / 60, 'mins')
def prepare_submission(threshold, filename, score, known, submit, model_file): """ Generate kaggle submission file. @param threshold: threshold given to 'new_whale' @param filname: submission file name """ image2whale = load_pickle_file(img2whale_file) # Create scores dir if doesn't exist scores_dir = callback_path + 'scores/' os.makedirs(scores_dir, exist_ok=True) # Create model_dir if doesn't exist model_dir = output_path + model_file.split('/')[0] + '/' os.makedirs(model_dir) new_whale = 'new_whale' # Prepare files paths` score_file = scores_dir + filename.replace('.h5', '.score') output_file = model_dir + filename # Code for creating submission file, 5 best scores for each whale image with open(score_file, 'w+') as sf: with open(output_file, 'wt', newline='\n') as f: f.write('Image,Id\n') for i, p in enumerate(tqdm(submit)): t = [] s = set() a = score[i, :] probs = [] for j in list(reversed(np.argsort(a))): img = known[j] if a[j] < threshold and new_whale not in s: s.add(new_whale) t.append(new_whale) probs.append(a[j]) if len(t) == 5: break for w in image2whale[img]: assert w != new_whale if w not in s: s.add(w) t.append(w) probs.append(a[j]) if len(t) == 5: break if len(t) == 5: break assert len(t) == 5 and len(s) == 5 f.write(p + ',' + ' '.join(t[:5]) + '\n') sf.write(p + ',' + ' '.join(map(str, probs)) + '\n')
def main(): datapath = "../dataset/data_2/redial/" train_file = datapath + "train_data.jsonl" test_file = datapath + "test_data.jsonl" valid_file = datapath + "valid_data.jsonl" entity2entityId = load_pickle_file(datapath + "entity2entityId.pkl") text_dict = load_pickle_file(datapath + "text_dict.pkl") id2entity = load_pickle_file(datapath + "id2entity.pkl") train_dataset = read_data(train_file, text_dict, entity2entityId, id2entity) test_dataset = read_data(test_file, text_dict, entity2entityId, id2entity) valid_dataset = read_data(valid_file, text_dict, entity2entityId, id2entity) with open(datapath + "dataset_train.pkl", "wb") as f: pkl.dump(train_dataset, f) with open(datapath + "dataset_test.pkl", "wb") as f: pkl.dump(test_dataset, f) with open(datapath + "dataset_valid.pkl", "wb") as f: pkl.dump(valid_dataset, f)
def make_w2ts(self): w2ts = {} whale2imgs = load_pickle_file(whale2imgs_file) for whale, imgs in tqdm(whale2imgs.items()): for img in imgs: if img in self.train: if whale not in w2ts: w2ts[whale] = [] if img not in w2ts[whale]: w2ts[whale].append(img) for w, ts in w2ts.items(): w2ts[w] = np.array(ts) return w2ts
def __init__(self, path): self.all_articles = pickle.load(open("model_files/output.data", 'rb'))['articles'] self.all_results = load_pickle_file(path) self.inner_results = {} self.directory_results = {}
def make_dicts(reset_all, make_val): # Create meta directory if doesn't already exist for all dictionaries generated below os.makedirs(meta_dir, exist_ok=True) train_data = dict([ (img, whale) for (_, img, whale) in read_csv(train_csv).to_records() ]) test_data = [img for (_, img, _) in read_csv(sample_csv).to_records()] # Load whale_to_imgs dictionary if exists, or create it otherwise if isfile(whale2imgs_file and not reset_all): whale2imgs = load_pickle_file(whale2imgs_file) else: whale2imgs = {} for img, whale in tqdm(train_data.items()): if whale not in whale2imgs: whale2imgs[whale] = [] if img not in whale2imgs[whale]: whale2imgs[whale].append(img) save_to_pickle(whale2imgs_file, whale2imgs) if not isfile(img2whale_file) or reset_all: # Find elements from training set other then 'new_whale' img2whale = {} for img, whale in tqdm(train_data.items()): if whale != 'new_whale': if img not in img2whale: img2whale[img] = whale train_known = sorted(list(img2whale.keys())) save_to_pickle(img2whale_file, img2whale) save_to_pickle(train_known_file, train_known) save_to_pickle(train_submit_file, test_data) if not (isfile(train_examples_file) and isfile(validation_examples_file) and reset_all == False): train_examples = [] validation_examples = [] lonely = [] new_whale = [] val_match = [] lonely_count = len([x for x in whale2imgs.values() if len(x) == 1]) # 2073 couple_count = len([x for x in whale2imgs.values() if len(x) == 2]) # 1285 new_count = len([x for x in train_data.values() if x == 'new_whale']) # 9664 # aditional matching whales count needed or creating balanced validation dataset (same matching and unmatching number of examples) extra_count = lonely_count - couple_count # 2073 - 1285 = 788 val_known = [] val_submit = [] matching_count = 0 small_train_examples = [] small_count = 0 if make_val: for whale, imgs in tqdm(whale2imgs.items()): if whale == 'new_whale': new_whale += imgs elif len(imgs) == 1: lonely += imgs val_known += imgs elif len(imgs) == 2: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[1]) val_submit.append((imgs[0], whale)) elif len(imgs) >= 4 and matching_count < extra_count: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[0]) val_submit.append((imgs[1], whale)) matching_count += 1 train_examples += imgs[2:] if (small_count + 2) % 10 < 2: small_train_examples += imgs[2:] small_count += 2 else: train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) else: for whale, imgs in tqdm(whale2imgs.items()): if whale == 'new_whale': new_whale += imgs elif len(imgs) == 1: lonely += imgs val_known += imgs elif len(imgs) == 2: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[1]) val_submit.append((imgs[0], whale)) train_examples += imgs if (small_count + 2) % 10 < 2: small_train_examples += imgs small_count += 2 elif len(imgs) >= 4 and matching_count < extra_count: val_match.append((imgs[0], imgs[1], 1)) val_known.append(imgs[0]) val_submit.append((imgs[1], whale)) matching_count += 1 train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) else: train_examples += imgs if (small_count + len(imgs)) % 10 < len(imgs): small_train_examples += imgs small_count += len(imgs) print('lonely whales count: ', lonely_count) print('new whales count: ', new_count) print('couple whales count: ', couple_count) print('extra whales count: ', extra_count) random.shuffle(lonely) val_unmatch = list( zip(lonely, np.random.choice(new_whale, size=lonely_count, replace=False), np.zeros(lonely_count, dtype=np.int8))) validation_examples = val_match + val_unmatch random.shuffle(validation_examples) random.shuffle(train_examples) # small_train_size = len(train_examples) // 10 # small_train_examples = train_examples[:small_train_size] small_validation_size = len(validation_examples) // 10 small_validation_examples = validation_examples[:small_validation_size] # print('TRAIN') # print(train_examples[:10]) # print('VALIDATION') # print(validation_examples[:10]) print('Train size: ', len(train_examples)) print('Validation size: ', len(validation_examples)) print('val_known size: ', len(val_known)) print('val_submit size: ', len(val_submit)) save_to_pickle(train_examples_file, train_examples) save_to_pickle(validation_examples_file, validation_examples) save_to_pickle(train_examples_small_file, small_train_examples) save_to_pickle(validation_examples_small_file, small_validation_examples) save_to_pickle(val_known_file, val_known) save_to_pickle(val_submit_file, val_submit)
break return shift_keypoints if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--pickle_file", default="pickles/image_data_normalized.p") parser.add_argument("--cluster", default="basic") parser.add_argument("--save_folder", default="save_results") args = parser.parse_args() pickle_file = args.pickle_file cluster = args.cluster images, keypoints = load_pickle_file(args.pickle_file) keypoints = np.array(keypoints) keypoints = keypoints.reshape(keypoints.shape[0], keypoints.shape[1], 2) if cluster == "basic": save_path = args.save_folder cluster_basic(keypoints, save_path) elif cluster == "kmeans": grps, centers = cluster_kmeans(keypoints, DEFAULT_K_VALUE, args.save_folder) #sse = compute_sse(k, keypoints,grps, centers) elif cluster == "mean_shift": save_path = args.save_folder new_points = mean_shift(keypoints)
#!/usr/bin/env python3 import os import cv2 import shutil from utils import load_pickle_file allowed_width = 256 allowed_height = 192 images, labels = load_pickle_file("pickles/image_data_normalized.p") if not os.path.exists("test_images"): os.makedirs("test_images") for img_path in images: img = cv2.imread(img_path) resized_img = cv2.resize(img, (allowed_width, allowed_height)) parts = img_path.split("/") save_path = os.path.join("test_images", "_".join(parts[-2:])) cv2.imwrite(save_path, resized_img)
def __init__(self, opt): self.entity2entityId = load_pickle_file(opt["entity2entityId"]) self.relation2relationId = load_pickle_file(opt["relation2relationId"])
def __init__(self, opt, transform): self.transform = transform self.entity2entityId = load_pickle_file(opt["entity2entityId"]) self.relation2relationId = load_pickle_file(opt["relation2relationId"]) self.dataset = load_pickle_file(opt["dataset"]) self.movie_ids = load_pickle_file(opt["movie_ids"])