Example #1
0
def comparing_with_ground_truth(tops, txt_infos, k):
    utils.dump_pickle("result.pkl", tops)
    gt = utils.get_pickle("datasets/qst1_w4/gt_corresps.pkl")
    hypo = utils.get_pickle("result.pkl")
    mapAtK = metrics.mapk(gt, hypo, k)
    print("\nMap@ " + str(k) + " is " + str(mapAtK))

    bbs_gt = np.asarray(
        utils.get_groundtruth("datasets/qst1_w4/text_boxes.pkl")).squeeze()
    bbs_predicted = [[painting.boundingxy for painting in txt_info]
                     for txt_info in txt_infos]
    mean_iou = utils.get_mean_IoU(bbs_gt, bbs_predicted)
    print("Mean Intersection over Union: ", mean_iou)

    texts_gt = utils.get_gt_text("datasets/qst1_w4")
    texts_predicted = [[painting.text for painting in txt_info]
                       for txt_info in txt_infos]
    with open('results.txt', 'w') as f:
        for item in texts_predicted:
            f.write("%s\n" % item)
    mean_lev = utils.compute_lev(texts_gt, texts_predicted)
    print(texts_predicted)
    print("\n")
    print(texts_gt)
    print("Mean Levenshtein distance: ", mean_lev)
Example #2
0
def main(args):
    s2i = get_pickle('assets/s2i.pkl')
    covariance = get_pickle('assets/covariance.pkl')
    indices = covariance.stack().index.tolist()
    dataset = gloveDataset(indices, covariance, s2i)
    model = GloVeCov(len(s2i), 300)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    losses = []
    epoch_losses = [np.inf, np.inf, np.inf]
    total_n = len(dataset)
    tmplt = "E:{:2d} - i:{:5d}({:4.2f}%) - L:{:5.5f}"
    for epoch in range(args.epoch):
        dataloader = DataLoader(dataset,
                                batch_size=args.bs,
                                collate_fn=collate_fn,
                                shuffle=True)
        model.train()
        losses = []
        for i, batch in enumerate(dataloader):
            left, right, covariances = batch
            left = torch.LongTensor(left)
            right = torch.LongTensor(right)
            covariances = torch.FloatTensor(covariances)
            loss = model(left, right, covariances)
            loss.backward()
            optimizer.step()
            model.zero_grad()
            losses.append(np.sqrt(loss.data))
            if i % 100 == 0:
                ml = np.mean(losses)
                t = tmplt.format(epoch, i, i * args.bs / total_n * 100, ml)
                print(t)
                losses = []
        model.eval()
        dataloader = DataLoader(dataset,
                                batch_size=args.bs,
                                collate_fn=collate_fn,
                                shuffle=True)
        losses = []
        for i, batch in enumerate(dataloader):
            left, right, covariances = batch
            left = torch.LongTensor(left)
            right = torch.LongTensor(right)
            covariances = torch.FloatTensor(covariances)
            loss = model(left, right, covariances)
            losses.append(np.sqrt(loss.data))
        epoch_losses.append(np.mean(losses))
        print('Epoch loss {}'.format(epoch_losses[-1]))
        if epoch_losses[-1] > epoch_losses[-4]:
            break
        else:
            filename = 'assets/model/model_glove_cov.torch'
            state = dict(state_dict=model.state_dict(),
                         loss=epoch_losses,
                         args=args)
            torch.save(state, filename)
Example #3
0
def main():
    #K parameter for map@k
    k = 1
    # Get images and denoise query set.
    print("Reading images...")
    qs = get_imgs("datasets/qsd1_w4")
    db = get_imgs("datasets/DDBB")
    """ Denoising methods
    "Gaussian"
    "Median"
    "bilateral"
    "FastNl"
    """
    print("Denoising images...")
    #qs_denoised = [utils.denoise_image(img, method="FastNl") for img in tqdm(qs)]

    #Separating paitings inside images to separate images
    qs_split = [background_remover.remove_background(img) for img in qs]

    print("\nComputing histograms...")
    hogs_qs = [[utils.get_hog_histogram(painting) for painting in img] for img in qs_split]
    hogs_ddbb = utils.get_hog_histograms(db)

    print("\nComputing distances")
    distances = []

    #Generating distances between qs images and db images
    for im in tqdm(hogs_qs):
        current_im = []
        for painting_hog in im:
            current_pt = []
            for db_hog in hogs_ddbb:
                current_pt.append(sum(np.abs(painting_hog - db_hog)))
            current_im.append(current_pt)
        distances.append(current_im)

    print("Done calculating hogs")

    #Generating predictions
    predictions = []

    for im in distances:
        current_im = []
        for painting_dst in im:
            current_im.append(utils.list_argsort(painting_dst)[:k])
        predictions.append(current_im)

    #Remove nesting of lists
    hypo = []
    for im in predictions:
        current_im = []
        for painting in im:
            for pred in painting:
                current_im.append(pred)
        hypo.append(current_im)

    #Generate map@k
    gt = utils.get_pickle("datasets/qsd1_w4/gt_corresps.pkl")
    mapAtK = metrics.mapk(gt, hypo, k)
    print("\nMap@ " + str(k) + " is " + str(mapAtK))
Example #4
0
    def get_latest_tables(self):
        if self.table_ids is not None:
            log("Getting results from table(s) {} for group {}".format(self.table_ids, self.group_id), 0)
            return [table for table in self.get_tables() if table[0] in self.table_ids]
        if self.num_recent_tables is not None:
            log("Getting results from the most recent {} table(s) for group {}".format(self.num_recent_tables, self.group_id), 0)
            all_tables = self.get_tables()
            if self.num_recent_tables > len(all_tables):
                log("Running on all tables: {}".format(all_tables))
                return all_tables
            else:
                return all_tables[-1 * self.num_recent_tables:]

        log("Getting results from all tables since last table backup")
        new_table_list = self.get_tables()
        old_table_list_filename = "old_tables_{}.pkl".format(self.group_id)  # Group specific table list
        try:
            old_tables = get_pickle(self.output_dir, old_table_list_filename)
            new_tables = list(set(new_table_list) - set(old_tables))
            set_pickle(new_table_list, self.output_dir, old_table_list_filename)
            log("Found {} new tables since the last table backup. Tables: {}".format(len(new_tables), new_tables))
            return new_tables
        except Exception as e:
            log("Error: {}. Likely no table backup. Backing up current table list for future run.".format(e))
            set_pickle(new_table_list, self.output_dir, old_table_list_filename)
            return []
Example #5
0
def comparing_with_ground_truth(tops, txt_infos, k):
    texts_predicted = [[painting.text for painting in txt_info]
                       for txt_info in txt_infos]
    for i, item in enumerate(texts_predicted):
        with open('outputs/' + f'{i:05}' + '.txt', 'w') as f:
            for text in item:
                f.write("%s\n" % text)

    gt = utils.get_pickle("datasets/qsd1_w5/gt_corresps.pkl")
    mapAtK = utils.compute_mapk(gt, tops, k)
    print("\nMap@ " + str(k) + " is " + str(mapAtK))

    bbs_gt = np.asarray(
        utils.get_groundtruth("datasets/qsd1_w5/text_boxes.pkl")).squeeze()
    bbs_predicted = [[painting.boundingxy for painting in txt_info]
                     for txt_info in txt_infos]
    mean_iou = utils.get_mean_IoU(bbs_gt, bbs_predicted)
    print("Mean Intersection over Union: ", mean_iou)

    texts_gt = utils.get_gt_text("datasets/qsd1_w5")
    texts_predicted = [[painting.text for painting in txt_info]
                       for txt_info in txt_infos]
    mean_lev = utils.compute_lev(texts_gt, texts_predicted)
    print(texts_predicted)
    print("\n")
    print(texts_gt)
    print("Mean Levenshtein distance: ", mean_lev)
Example #6
0
 def update_chat_for_table(self, table_id, ignore_last):
     old_chat_filename = "{}_{}_chat.pkl".format(self.group_id, table_id)
     old_chat = get_pickle(
         self.download_dir, old_chat_filename) if os.path.isfile(
             os.path.join(self.download_dir, old_chat_filename)) else None
     new_chat_before_ignore_last = self.get_chat_history(table_id)
     if len(new_chat_before_ignore_last) > ignore_last:
         new_chat = new_chat_before_ignore_last[:len(
             new_chat_before_ignore_last) - ignore_last]
     else:
         new_chat = new_chat_before_ignore_last
     len_new_chat = len(new_chat)
     if table_id in self.last_new_chat_lengths:
         last_new_chat_length = self.last_new_chat_lengths[table_id]
     else:
         last_new_chat_length = None
     if len_new_chat > 400 or \
             (last_new_chat_length is not None and last_new_chat_length > 140 and len_new_chat >= last_new_chat_length * 1.7) or \
             duplicate_at_start(new_chat_before_ignore_last):
         new_chat = []
         log("New chat length {} exceeds 1.7x the last new chat length {} or max 400 or duplicate. Ignoring."
             .format(len_new_chat, last_new_chat_length))
     else:
         log("New chat length {} does not exceed 1.7x last new chat length {} or max 400 or duplicate. Using new chat."
             .format(len_new_chat, last_new_chat_length))
         self.last_new_chat_lengths[table_id] = len(new_chat)
     log("Old chat length for table {}: {}".format(table_id,
                                                   self.log_chat(old_chat)))
     log("New chat length for table {}: {}".format(table_id,
                                                   self.log_chat(new_chat)))
     if old_chat != new_chat:
         consolodated_chat = self.consolodate_chats(old_chat, new_chat)
         if consolodated_chat is not None and len(consolodated_chat) > 0:
             log("Saving consolodated chat for table {}, length {}, first message {}, last message {}"
                 .format(table_id, self.log_chat(consolodated_chat),
                         consolodated_chat[0],
                         consolodated_chat[len(consolodated_chat) - 1]))
             set_pickle(consolodated_chat, self.download_dir,
                        old_chat_filename)
         else:
             log("Consolodated chat for table {} is None or len 0. Not saving."
                 .format(table_id))
             #log("Old:{}\n".format(old_chat))
             #log("New:{}\n".format(new_chat))
             if len(old_chat) > 20:
                 log("End of old:{}\n".format(old_chat[-20:]))
             if len(new_chat) > 20:
                 log("Beginning of new:{}\n".format(new_chat[:20]))
             log("Consolodated:\n{}".format(consolodated_chat))
         if consolodated_chat is None or len(
                 consolodated_chat) == 0 or not new_chat:
             # Situations where we fail, but want to run again, just run again rather than waiting
             log("Re running self.update_chat_for_table for table {} in 2 seconds"
                 .format(table_id))
             time.sleep(2)
             self.update_chat_for_table(table_id, ignore_last)
     else:
         log("Chat has not changed for table {}".format(table_id))
Example #7
0
 def from_file(cls, faces, data_file_name, n_eigs):
     pickle = get_pickle(data_file_name)
     if pickle is not None:
         logging.info('using previously calculated facespace')
         return cls(faces, n_eigs=n_eigs, face_space=pickle)
     else:
         logging.info('No previous facespace was found')
         eig_face = cls(faces, n_eigs=n_eigs)
         save_pickle(eig_face.entire_face_space, data_file_name)
         return eig_face
Example #8
0
 def __init__(self, group_id, download_dir, table_ids, num_recent_tables):
     log("Running SiteReader with group id {}, download dir {}, table ids {}, num tables {}".format(group_id, download_dir, table_ids, num_recent_tables), 1)
     self.group_id = group_id
     self.download_dir = download_dir
     self.table_ids = table_ids
     self.num_recent_tables = num_recent_tables
     self.output_dir = os.path.abspath(os.path.join(download_dir, "../"))
     self.cookies = get_pickle(self.output_dir, "cookies.pkl")
     self.driver = None
     self.latest_tables = self.get_latest_tables()
     self.num_files_retrieved = 0
Example #9
0
    def get_new_messages(self):
        log("Checking for new messages", 3)
        message_id_list = [msg.id for msg in self.groupme_group.messages.list()]
        messages_pickle = "groupme_messages_{}.pkl".format(self.groupme_group_id)
        messages_pickle_path = os.path.join(self.output_dir, messages_pickle)

        if os.path.isfile(messages_pickle_path):
            old_messages = get_pickle(self.output_dir, messages_pickle)
            messages_to_check = list(set(message_id_list) - set(old_messages))
        else:
            messages_to_check = message_id_list
        set_pickle(message_id_list, self.output_dir, messages_pickle)
        self.new_messages = set(messages_to_check)
Example #10
0
def lambda_handler(event, context):
    try:
        body = json.loads(event.get('body', ''))
        pkl = get_pickle('pickles/factory_linear_regression.pkl')

        df = pd.DataFrame([body])
        X = df[['temp', 'vibration', 'current', 'noise']]
        X = normalize_features(X)

        prediction = pkl['model'].predict(X)
        encoding_prediction = pkl['encoding'][prediction[0]]
    except Exception as e:
        return response({"Error": str(e)}, 500)
    else:
        return response({"prediction": encoding_prediction}, 200)
Example #11
0
 def __init__(self, group_id, output_dir):
     log(
         "Running HistoryLogger with group id {}, output dir {}".format(
             group_id, output_dir), 1)
     self.group_id = group_id
     self.output_dir = os.path.join(script_path, output_dir)
     self.download_dir = os.path.join(script_path, self.output_dir,
                                      "ChatHistories")
     if not os.path.exists(self.download_dir):
         os.mkdir(self.download_dir)
     self.cookies = get_pickle(self.output_dir, "cookies.pkl")
     self.driver = None
     self.driver = None
     self.last_new_chat_lengths = {}
     self.last_active_tables = None
     self.active_tables = None
     self.all_tables = None
     self.last_all_tables = None
Example #12
0
    def task6(self, k=10):
        QS = [  # noqa
            hists for hists in tqdm(
                MultiHistDataset(self.QSD2_W3,
                                 masking=True,
                                 bbox=True,
                                 multires=4,
                                 method="color",
                                 texture="LBP",
                                 denoise=True))
        ]
        GT = get_pickle("datasets/qsd2_w3/gt_corresps.pkl")
        DB = list(
            tqdm(
                HistDataset(self.DDBB,
                            masking=False,
                            multires=4,
                            method="color",
                            texture="LBP")))  # noqa
        tops = find_multi_img_corresp_keep(QS, DB, k)
        dump_pickle("result_qst2.pkl", tops)
        mapAtK = metrics.mapk(GT, tops, k)
        print("Map@k is " + str(mapAtK))
        exit()
        with open("outputs/resutls.pkl", "wb") as f:
            pickle.dump(tops, f)
        print(tops)

        # Generate pngs
        QS1 = Dataset(self.QSD2_W2, masking=True, bbox=True)
        for i in range(len(QS1)):
            im = QS1.get_mask(i)
            cv2.imwrite("outputs/" + str(i) + ".png", im)
        text_boxes = [BBox().get_bbox_cords(QS1[i]) for i in range(len(QS1))]
        with open("outputs/text_boxes.pkl", "wb") as f:
            pickle.dump(text_boxes, f)
Example #13
0
    img = Image.merge("RGB", (b, g, r))
    img = img.rotate(angle=rotation, expand=False)
    return np.array(img)


def read_horizontal_image(path):
    img = cv2.imread(path)
    lines = get_all_lines(img)
    angle = get_horiz_angle(lines)
    return get_rotated(path, angle)


if __name__ == "__main__":
    from utils import get_pickle
    from tqdm.auto import tqdm

    GT = get_pickle("datasets/angles_qsd1w5_v2.pkl")
    sum_err, elems = 0, 0
    for i, path in enumerate(tqdm(image_paths)):
        img = cv2.imread(path)
        lines = get_all_lines(img)
        angle = get_horiz_angle(lines)
        gt_like_angle = get_GTFORMAT_angle(angle)
        for gt in GT[i]:
            sum_err += angle_diff(gt, gt_like_angle, mod=180)
            elems += 1
        show_img(draw_horizontal_lines(img, lines, angle))
        show_img(get_rotated(path, angle))

    print(f"Average precision {sum_err / elems}")
Example #14
0
import torch
import numpy as np
import pandas as pd
from model import GloVeCor, GloVeCov, SkipGram
from utils import get_pickle

s2i = get_pickle('assets/s2i.pkl')
i2s = get_pickle('assets/i2s.pkl')
holdings = pd.read_csv('assets/holdings.csv', index_col=6)

glove_cor_checkpoint = torch.load('assets/model/model_glove_cor.torch')
model_glove = GloVeCor(len(s2i), 300)
model_glove.load_state_dict(glove_cor_checkpoint['state_dict'])
weights = model_glove.embeddings.weight.detach()
np.savetxt('embeddings/glove_cor_tensors.tsv', weights, delimiter='\t')

glove_cov_checkpoint = torch.load('assets/model/model_glove_cov.torch')
model_glove = GloVeCov(len(s2i), 300)
model_glove.load_state_dict(glove_cov_checkpoint['state_dict'])
weights = model_glove.embeddings.weight.detach()
np.savetxt('embeddings/glove_cov_tensors.tsv', weights, delimiter='\t')

skip_checkpoint = torch.load('assets/model/model_skip.torch')
model_skip = SkipGram(len(s2i), 300)
model_skip.load_state_dict(skip_checkpoint['state_dict'])
weights = model_skip.embeddings.weight.detach()
np.savetxt('embeddings/skip_tensors.tsv', weights, delimiter='\t')

selector = [i2s[e] for e in range(len(weights))]
cols = ['Name', 'Sector', 'Industry Group', 'Country', 'Currency']
metadata = holdings.loc[selector, cols]
Example #15
0
import pandas as pd
from utils import write_pickle, get_pickle

CONTEXT_WINDOW = 3

s2i = get_pickle('assets/s2i.pkl')
returns = get_pickle('assets/returns.pkl')

returns.columns = [s2i[e] for e in returns.columns]
dataset = []
for date, row in returns.iterrows():
    print(date)
    for i, symbol in enumerate(row.index):
        sym_ret = row[symbol]
        if pd.isnull(sym_ret):
            continue
        abs = row[(row - sym_ret).dropna().abs().argsort()]
        similars = abs.iloc[1:(1 + CONTEXT_WINDOW)].index.tolist()
        for similar in similars:
            dataset.append(dict(input=symbol, target=similar))

dataset = pd.DataFrame(dataset)
write_pickle(dataset, 'assets/dataset.pkl')
Example #16
0
import pandas as pd
import numpy as np
from utils import get_pickle, to_corr

# Generals
returns = get_pickle('assets/returns.pkl')
corr = get_pickle('assets/correlations.pkl').values
cov = get_pickle('assets/covariance.pkl').values
days = returns.index
metadata = pd.read_csv('assets/metadata.tsv', sep='\t', index_col=False)
glove_cor = np.loadtxt('embeddings/glove_cor_tensors.tsv', delimiter='\t')
glove_cov = np.loadtxt('embeddings/glove_cov_tensors.tsv', delimiter='\t')
skip = np.loadtxt('embeddings/skip_tensors.tsv', delimiter='\t')
holdings = pd.read_csv('assets/holdings.csv', index_col=6)
holdings = holdings.reindex(returns.columns)
holdings = holdings.loc[holdings.index.notnull(), :]
aum = holdings['Mkt Val'].sum()
holdings.loc[:, 'Weight'] = holdings.loc[:, 'Mkt Val'] / aum
sectors = holdings['Sector'].dropna().unique().tolist()
weights = holdings['Weight']


def error_num(estimated, correct):
    mean = np.nanmean(np.abs(correct - estimated), axis=(0, 1))
    std = np.nanstd(np.abs(correct - estimated), axis=(0, 1))
    return mean, std


# Covariance and correlations
glove_cov_cov = np.matmul(glove_cov, np.transpose(glove_cov))
glove_cov_cor = to_corr(glove_cov)
Example #17
0
def sd_to_new_format_file(in_path, out_path):
    old_sd = get_pickle(in_path)
    new_sd = sd_to_new_format(old_sd)
    write_pickle(new_sd, out_path)
Example #18
0
                        help="path of directory to store converted score dicts OR path "
                             "of file at which to store merged file",
                        required=True)
    args = parser.parse_args()
    assert not os.path.exists(args.out_path)
    assert args.out_path.endswith('.pkl')
    print(args.action)

    new_sds = []
    merged_sd = None
    for action in args.action:
        print(f'action: {action}')
        if action == "convert_to_new":
            assert os.path.isdir(args.in_path)
            for sd_fname in glob.glob(os.path.join(args.in_path, '*')):
                new_sds.append(sd_to_new_format(get_pickle(sd_fname)))

        elif action == "merge":
            if len(new_sds) == 0:
                assert os.path.isdir(args.in_path)
                for sd_fname in glob.glob(os.path.join(args.in_path, '*')):
                    new_sds.append(get_pickle(sd_fname))

            for sd in new_sds:
                if merged_sd is None:
                    merged_sd = copy.deepcopy(sd)
                else:
                    merge_score_dicts(merged_sd, sd)

        else:
            if merged_sd is None: