def comparing_with_ground_truth(tops, txt_infos, k): utils.dump_pickle("result.pkl", tops) gt = utils.get_pickle("datasets/qst1_w4/gt_corresps.pkl") hypo = utils.get_pickle("result.pkl") mapAtK = metrics.mapk(gt, hypo, k) print("\nMap@ " + str(k) + " is " + str(mapAtK)) bbs_gt = np.asarray( utils.get_groundtruth("datasets/qst1_w4/text_boxes.pkl")).squeeze() bbs_predicted = [[painting.boundingxy for painting in txt_info] for txt_info in txt_infos] mean_iou = utils.get_mean_IoU(bbs_gt, bbs_predicted) print("Mean Intersection over Union: ", mean_iou) texts_gt = utils.get_gt_text("datasets/qst1_w4") texts_predicted = [[painting.text for painting in txt_info] for txt_info in txt_infos] with open('results.txt', 'w') as f: for item in texts_predicted: f.write("%s\n" % item) mean_lev = utils.compute_lev(texts_gt, texts_predicted) print(texts_predicted) print("\n") print(texts_gt) print("Mean Levenshtein distance: ", mean_lev)
def main(args): s2i = get_pickle('assets/s2i.pkl') covariance = get_pickle('assets/covariance.pkl') indices = covariance.stack().index.tolist() dataset = gloveDataset(indices, covariance, s2i) model = GloVeCov(len(s2i), 300) optimizer = optim.Adam(model.parameters(), lr=args.lr) losses = [] epoch_losses = [np.inf, np.inf, np.inf] total_n = len(dataset) tmplt = "E:{:2d} - i:{:5d}({:4.2f}%) - L:{:5.5f}" for epoch in range(args.epoch): dataloader = DataLoader(dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True) model.train() losses = [] for i, batch in enumerate(dataloader): left, right, covariances = batch left = torch.LongTensor(left) right = torch.LongTensor(right) covariances = torch.FloatTensor(covariances) loss = model(left, right, covariances) loss.backward() optimizer.step() model.zero_grad() losses.append(np.sqrt(loss.data)) if i % 100 == 0: ml = np.mean(losses) t = tmplt.format(epoch, i, i * args.bs / total_n * 100, ml) print(t) losses = [] model.eval() dataloader = DataLoader(dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True) losses = [] for i, batch in enumerate(dataloader): left, right, covariances = batch left = torch.LongTensor(left) right = torch.LongTensor(right) covariances = torch.FloatTensor(covariances) loss = model(left, right, covariances) losses.append(np.sqrt(loss.data)) epoch_losses.append(np.mean(losses)) print('Epoch loss {}'.format(epoch_losses[-1])) if epoch_losses[-1] > epoch_losses[-4]: break else: filename = 'assets/model/model_glove_cov.torch' state = dict(state_dict=model.state_dict(), loss=epoch_losses, args=args) torch.save(state, filename)
def main(): #K parameter for map@k k = 1 # Get images and denoise query set. print("Reading images...") qs = get_imgs("datasets/qsd1_w4") db = get_imgs("datasets/DDBB") """ Denoising methods "Gaussian" "Median" "bilateral" "FastNl" """ print("Denoising images...") #qs_denoised = [utils.denoise_image(img, method="FastNl") for img in tqdm(qs)] #Separating paitings inside images to separate images qs_split = [background_remover.remove_background(img) for img in qs] print("\nComputing histograms...") hogs_qs = [[utils.get_hog_histogram(painting) for painting in img] for img in qs_split] hogs_ddbb = utils.get_hog_histograms(db) print("\nComputing distances") distances = [] #Generating distances between qs images and db images for im in tqdm(hogs_qs): current_im = [] for painting_hog in im: current_pt = [] for db_hog in hogs_ddbb: current_pt.append(sum(np.abs(painting_hog - db_hog))) current_im.append(current_pt) distances.append(current_im) print("Done calculating hogs") #Generating predictions predictions = [] for im in distances: current_im = [] for painting_dst in im: current_im.append(utils.list_argsort(painting_dst)[:k]) predictions.append(current_im) #Remove nesting of lists hypo = [] for im in predictions: current_im = [] for painting in im: for pred in painting: current_im.append(pred) hypo.append(current_im) #Generate map@k gt = utils.get_pickle("datasets/qsd1_w4/gt_corresps.pkl") mapAtK = metrics.mapk(gt, hypo, k) print("\nMap@ " + str(k) + " is " + str(mapAtK))
def get_latest_tables(self): if self.table_ids is not None: log("Getting results from table(s) {} for group {}".format(self.table_ids, self.group_id), 0) return [table for table in self.get_tables() if table[0] in self.table_ids] if self.num_recent_tables is not None: log("Getting results from the most recent {} table(s) for group {}".format(self.num_recent_tables, self.group_id), 0) all_tables = self.get_tables() if self.num_recent_tables > len(all_tables): log("Running on all tables: {}".format(all_tables)) return all_tables else: return all_tables[-1 * self.num_recent_tables:] log("Getting results from all tables since last table backup") new_table_list = self.get_tables() old_table_list_filename = "old_tables_{}.pkl".format(self.group_id) # Group specific table list try: old_tables = get_pickle(self.output_dir, old_table_list_filename) new_tables = list(set(new_table_list) - set(old_tables)) set_pickle(new_table_list, self.output_dir, old_table_list_filename) log("Found {} new tables since the last table backup. Tables: {}".format(len(new_tables), new_tables)) return new_tables except Exception as e: log("Error: {}. Likely no table backup. Backing up current table list for future run.".format(e)) set_pickle(new_table_list, self.output_dir, old_table_list_filename) return []
def comparing_with_ground_truth(tops, txt_infos, k): texts_predicted = [[painting.text for painting in txt_info] for txt_info in txt_infos] for i, item in enumerate(texts_predicted): with open('outputs/' + f'{i:05}' + '.txt', 'w') as f: for text in item: f.write("%s\n" % text) gt = utils.get_pickle("datasets/qsd1_w5/gt_corresps.pkl") mapAtK = utils.compute_mapk(gt, tops, k) print("\nMap@ " + str(k) + " is " + str(mapAtK)) bbs_gt = np.asarray( utils.get_groundtruth("datasets/qsd1_w5/text_boxes.pkl")).squeeze() bbs_predicted = [[painting.boundingxy for painting in txt_info] for txt_info in txt_infos] mean_iou = utils.get_mean_IoU(bbs_gt, bbs_predicted) print("Mean Intersection over Union: ", mean_iou) texts_gt = utils.get_gt_text("datasets/qsd1_w5") texts_predicted = [[painting.text for painting in txt_info] for txt_info in txt_infos] mean_lev = utils.compute_lev(texts_gt, texts_predicted) print(texts_predicted) print("\n") print(texts_gt) print("Mean Levenshtein distance: ", mean_lev)
def update_chat_for_table(self, table_id, ignore_last): old_chat_filename = "{}_{}_chat.pkl".format(self.group_id, table_id) old_chat = get_pickle( self.download_dir, old_chat_filename) if os.path.isfile( os.path.join(self.download_dir, old_chat_filename)) else None new_chat_before_ignore_last = self.get_chat_history(table_id) if len(new_chat_before_ignore_last) > ignore_last: new_chat = new_chat_before_ignore_last[:len( new_chat_before_ignore_last) - ignore_last] else: new_chat = new_chat_before_ignore_last len_new_chat = len(new_chat) if table_id in self.last_new_chat_lengths: last_new_chat_length = self.last_new_chat_lengths[table_id] else: last_new_chat_length = None if len_new_chat > 400 or \ (last_new_chat_length is not None and last_new_chat_length > 140 and len_new_chat >= last_new_chat_length * 1.7) or \ duplicate_at_start(new_chat_before_ignore_last): new_chat = [] log("New chat length {} exceeds 1.7x the last new chat length {} or max 400 or duplicate. Ignoring." .format(len_new_chat, last_new_chat_length)) else: log("New chat length {} does not exceed 1.7x last new chat length {} or max 400 or duplicate. Using new chat." .format(len_new_chat, last_new_chat_length)) self.last_new_chat_lengths[table_id] = len(new_chat) log("Old chat length for table {}: {}".format(table_id, self.log_chat(old_chat))) log("New chat length for table {}: {}".format(table_id, self.log_chat(new_chat))) if old_chat != new_chat: consolodated_chat = self.consolodate_chats(old_chat, new_chat) if consolodated_chat is not None and len(consolodated_chat) > 0: log("Saving consolodated chat for table {}, length {}, first message {}, last message {}" .format(table_id, self.log_chat(consolodated_chat), consolodated_chat[0], consolodated_chat[len(consolodated_chat) - 1])) set_pickle(consolodated_chat, self.download_dir, old_chat_filename) else: log("Consolodated chat for table {} is None or len 0. Not saving." .format(table_id)) #log("Old:{}\n".format(old_chat)) #log("New:{}\n".format(new_chat)) if len(old_chat) > 20: log("End of old:{}\n".format(old_chat[-20:])) if len(new_chat) > 20: log("Beginning of new:{}\n".format(new_chat[:20])) log("Consolodated:\n{}".format(consolodated_chat)) if consolodated_chat is None or len( consolodated_chat) == 0 or not new_chat: # Situations where we fail, but want to run again, just run again rather than waiting log("Re running self.update_chat_for_table for table {} in 2 seconds" .format(table_id)) time.sleep(2) self.update_chat_for_table(table_id, ignore_last) else: log("Chat has not changed for table {}".format(table_id))
def from_file(cls, faces, data_file_name, n_eigs): pickle = get_pickle(data_file_name) if pickle is not None: logging.info('using previously calculated facespace') return cls(faces, n_eigs=n_eigs, face_space=pickle) else: logging.info('No previous facespace was found') eig_face = cls(faces, n_eigs=n_eigs) save_pickle(eig_face.entire_face_space, data_file_name) return eig_face
def __init__(self, group_id, download_dir, table_ids, num_recent_tables): log("Running SiteReader with group id {}, download dir {}, table ids {}, num tables {}".format(group_id, download_dir, table_ids, num_recent_tables), 1) self.group_id = group_id self.download_dir = download_dir self.table_ids = table_ids self.num_recent_tables = num_recent_tables self.output_dir = os.path.abspath(os.path.join(download_dir, "../")) self.cookies = get_pickle(self.output_dir, "cookies.pkl") self.driver = None self.latest_tables = self.get_latest_tables() self.num_files_retrieved = 0
def get_new_messages(self): log("Checking for new messages", 3) message_id_list = [msg.id for msg in self.groupme_group.messages.list()] messages_pickle = "groupme_messages_{}.pkl".format(self.groupme_group_id) messages_pickle_path = os.path.join(self.output_dir, messages_pickle) if os.path.isfile(messages_pickle_path): old_messages = get_pickle(self.output_dir, messages_pickle) messages_to_check = list(set(message_id_list) - set(old_messages)) else: messages_to_check = message_id_list set_pickle(message_id_list, self.output_dir, messages_pickle) self.new_messages = set(messages_to_check)
def lambda_handler(event, context): try: body = json.loads(event.get('body', '')) pkl = get_pickle('pickles/factory_linear_regression.pkl') df = pd.DataFrame([body]) X = df[['temp', 'vibration', 'current', 'noise']] X = normalize_features(X) prediction = pkl['model'].predict(X) encoding_prediction = pkl['encoding'][prediction[0]] except Exception as e: return response({"Error": str(e)}, 500) else: return response({"prediction": encoding_prediction}, 200)
def __init__(self, group_id, output_dir): log( "Running HistoryLogger with group id {}, output dir {}".format( group_id, output_dir), 1) self.group_id = group_id self.output_dir = os.path.join(script_path, output_dir) self.download_dir = os.path.join(script_path, self.output_dir, "ChatHistories") if not os.path.exists(self.download_dir): os.mkdir(self.download_dir) self.cookies = get_pickle(self.output_dir, "cookies.pkl") self.driver = None self.driver = None self.last_new_chat_lengths = {} self.last_active_tables = None self.active_tables = None self.all_tables = None self.last_all_tables = None
def task6(self, k=10): QS = [ # noqa hists for hists in tqdm( MultiHistDataset(self.QSD2_W3, masking=True, bbox=True, multires=4, method="color", texture="LBP", denoise=True)) ] GT = get_pickle("datasets/qsd2_w3/gt_corresps.pkl") DB = list( tqdm( HistDataset(self.DDBB, masking=False, multires=4, method="color", texture="LBP"))) # noqa tops = find_multi_img_corresp_keep(QS, DB, k) dump_pickle("result_qst2.pkl", tops) mapAtK = metrics.mapk(GT, tops, k) print("Map@k is " + str(mapAtK)) exit() with open("outputs/resutls.pkl", "wb") as f: pickle.dump(tops, f) print(tops) # Generate pngs QS1 = Dataset(self.QSD2_W2, masking=True, bbox=True) for i in range(len(QS1)): im = QS1.get_mask(i) cv2.imwrite("outputs/" + str(i) + ".png", im) text_boxes = [BBox().get_bbox_cords(QS1[i]) for i in range(len(QS1))] with open("outputs/text_boxes.pkl", "wb") as f: pickle.dump(text_boxes, f)
img = Image.merge("RGB", (b, g, r)) img = img.rotate(angle=rotation, expand=False) return np.array(img) def read_horizontal_image(path): img = cv2.imread(path) lines = get_all_lines(img) angle = get_horiz_angle(lines) return get_rotated(path, angle) if __name__ == "__main__": from utils import get_pickle from tqdm.auto import tqdm GT = get_pickle("datasets/angles_qsd1w5_v2.pkl") sum_err, elems = 0, 0 for i, path in enumerate(tqdm(image_paths)): img = cv2.imread(path) lines = get_all_lines(img) angle = get_horiz_angle(lines) gt_like_angle = get_GTFORMAT_angle(angle) for gt in GT[i]: sum_err += angle_diff(gt, gt_like_angle, mod=180) elems += 1 show_img(draw_horizontal_lines(img, lines, angle)) show_img(get_rotated(path, angle)) print(f"Average precision {sum_err / elems}")
import torch import numpy as np import pandas as pd from model import GloVeCor, GloVeCov, SkipGram from utils import get_pickle s2i = get_pickle('assets/s2i.pkl') i2s = get_pickle('assets/i2s.pkl') holdings = pd.read_csv('assets/holdings.csv', index_col=6) glove_cor_checkpoint = torch.load('assets/model/model_glove_cor.torch') model_glove = GloVeCor(len(s2i), 300) model_glove.load_state_dict(glove_cor_checkpoint['state_dict']) weights = model_glove.embeddings.weight.detach() np.savetxt('embeddings/glove_cor_tensors.tsv', weights, delimiter='\t') glove_cov_checkpoint = torch.load('assets/model/model_glove_cov.torch') model_glove = GloVeCov(len(s2i), 300) model_glove.load_state_dict(glove_cov_checkpoint['state_dict']) weights = model_glove.embeddings.weight.detach() np.savetxt('embeddings/glove_cov_tensors.tsv', weights, delimiter='\t') skip_checkpoint = torch.load('assets/model/model_skip.torch') model_skip = SkipGram(len(s2i), 300) model_skip.load_state_dict(skip_checkpoint['state_dict']) weights = model_skip.embeddings.weight.detach() np.savetxt('embeddings/skip_tensors.tsv', weights, delimiter='\t') selector = [i2s[e] for e in range(len(weights))] cols = ['Name', 'Sector', 'Industry Group', 'Country', 'Currency'] metadata = holdings.loc[selector, cols]
import pandas as pd from utils import write_pickle, get_pickle CONTEXT_WINDOW = 3 s2i = get_pickle('assets/s2i.pkl') returns = get_pickle('assets/returns.pkl') returns.columns = [s2i[e] for e in returns.columns] dataset = [] for date, row in returns.iterrows(): print(date) for i, symbol in enumerate(row.index): sym_ret = row[symbol] if pd.isnull(sym_ret): continue abs = row[(row - sym_ret).dropna().abs().argsort()] similars = abs.iloc[1:(1 + CONTEXT_WINDOW)].index.tolist() for similar in similars: dataset.append(dict(input=symbol, target=similar)) dataset = pd.DataFrame(dataset) write_pickle(dataset, 'assets/dataset.pkl')
import pandas as pd import numpy as np from utils import get_pickle, to_corr # Generals returns = get_pickle('assets/returns.pkl') corr = get_pickle('assets/correlations.pkl').values cov = get_pickle('assets/covariance.pkl').values days = returns.index metadata = pd.read_csv('assets/metadata.tsv', sep='\t', index_col=False) glove_cor = np.loadtxt('embeddings/glove_cor_tensors.tsv', delimiter='\t') glove_cov = np.loadtxt('embeddings/glove_cov_tensors.tsv', delimiter='\t') skip = np.loadtxt('embeddings/skip_tensors.tsv', delimiter='\t') holdings = pd.read_csv('assets/holdings.csv', index_col=6) holdings = holdings.reindex(returns.columns) holdings = holdings.loc[holdings.index.notnull(), :] aum = holdings['Mkt Val'].sum() holdings.loc[:, 'Weight'] = holdings.loc[:, 'Mkt Val'] / aum sectors = holdings['Sector'].dropna().unique().tolist() weights = holdings['Weight'] def error_num(estimated, correct): mean = np.nanmean(np.abs(correct - estimated), axis=(0, 1)) std = np.nanstd(np.abs(correct - estimated), axis=(0, 1)) return mean, std # Covariance and correlations glove_cov_cov = np.matmul(glove_cov, np.transpose(glove_cov)) glove_cov_cor = to_corr(glove_cov)
def sd_to_new_format_file(in_path, out_path): old_sd = get_pickle(in_path) new_sd = sd_to_new_format(old_sd) write_pickle(new_sd, out_path)
help="path of directory to store converted score dicts OR path " "of file at which to store merged file", required=True) args = parser.parse_args() assert not os.path.exists(args.out_path) assert args.out_path.endswith('.pkl') print(args.action) new_sds = [] merged_sd = None for action in args.action: print(f'action: {action}') if action == "convert_to_new": assert os.path.isdir(args.in_path) for sd_fname in glob.glob(os.path.join(args.in_path, '*')): new_sds.append(sd_to_new_format(get_pickle(sd_fname))) elif action == "merge": if len(new_sds) == 0: assert os.path.isdir(args.in_path) for sd_fname in glob.glob(os.path.join(args.in_path, '*')): new_sds.append(get_pickle(sd_fname)) for sd in new_sds: if merged_sd is None: merged_sd = copy.deepcopy(sd) else: merge_score_dicts(merged_sd, sd) else: if merged_sd is None: