def verify_num_unique_urls( split: str, csv_save_fpath: str, hit_info: SentinelHIT, split_fpaths: List[str] ) -> None: """ Args: - split: str, - csv_save_fpath: str, - hit_info: SentinelHIT, - split_fpaths: List[str] Returns: - None """ split_fnames = [ Path(fpath).name for fpath in split_fpaths] rows = read_csv(csv_save_fpath) unique_found_fnames = set() for row in rows: for k in row.keys(): url = row[k] fname = Path(url).name unique_found_fnames.add(fname) sent_fnames = list(zip(*hit_info.sentinels))[0] # sentinel fnames found_plus_sent = unique_found_fnames.union(set(sent_fnames)) split_plus_sent = set(split_fnames).union(set(sent_fnames)) assert found_plus_sent == split_plus_sent print('All expected URLS found in published csv. Success...')
def test_write_csv(): """ """ dict_list = [{ 'WorkerId': '1', 'image_url_1': 'cat.png', 'image_url_2': 'dog.png' }, { 'WorkerId': '2', 'image_url_1': 'elephant.png', 'image_url_2': 'house.png' }] csv_fpath = f'{_TEST_DIR}/test_data/temp_written_data.csv' write_csv(csv_fpath, dict_list) rows = read_csv(csv_fpath) d1 = collections.OrderedDict() d1['WorkerId'] = '1' d1['image_url_1'] = 'cat.png' d1['image_url_2'] = 'dog.png' d2 = collections.OrderedDict() d2['WorkerId'] = '2' d2['image_url_1'] = 'elephant.png' d2['image_url_2'] = 'house.png' gt_dict_list = [d1, d2] assert gt_dict_list == rows os.remove(csv_fpath)
def get_tracking_results(): """ """ fpath = '/Users/johnlamb/Downloads/cvpr-argoverse-tracking-winners.csv' rows = read_csv(fpath, delimiter=',') result_dict = defaultdict(list) for i, row in enumerate(rows): print(row['Team name']) # 'Submission ID,Submitted at,AVG-RANK, result_dict['Team name'] += [row['Team name']] result_dict['C:MOTA'] += [float(row['C:MOTA'])] result_dict['P:MOTA'] += [float(row['P:MOTA'])] result_dict['C:MOTPD'] += [float(row['C:MOTPD'])] result_dict['P:MOTPD'] += [float(row['P:MOTPD'])] result_dict['C:MOTPO'] += [float(row['C:MOTPO'])] result_dict['P:MOTPO'] += [float(row['P:MOTPO'])] result_dict['C:MOTPI'] += [float(row['C:MOTPI'])] result_dict['P:MOTPI'] += [float(row['P:MOTPI'])] result_dict['C:IDF1'] += [100 * float(row['C:IDF1'])] result_dict['P:IDF1'] += [100 * float(row['P:IDF1'])] result_dict['C:MT'] += [100 * float(row['C:MT'])] result_dict['P:MT'] += [100 * float(row['P:MT'])] result_dict['C:ML'] += [100 * float(row['C:ML'])] result_dict['P:ML'] += [100 * float(row['P:ML'])] result_dict['C:FP'] += [int(row['C:FP'])] result_dict['P:FP'] += [int(row['P:FP'])] result_dict['C:FN'] += [int(row['C:FN'])] result_dict['P:FN'] += [int(row['P:FN'])] result_dict['C:SW'] += [int(row['C:SW'])] result_dict['P:SW'] += [int(row['P:SW'])] result_dict['C:FRG'] += [int(row['C:FRG'])] result_dict['P:FRG'] += [int(row['P:FRG'])] result_dict['C:MT-OCC'] += [100 * float(row['C:MT-OCC'])] result_dict['C:MT-FAR'] += [100 * float(row['C:MT-FAR'])] result_dict['C:ML-OCC'] += [100 * float(row['C:ML-OCC'])] result_dict['C:ML-FAR'] += [100 * float(row['C:ML-FAR'])] result_dict['C:FRG-OCC'] += [int(row['C:FRG-OCC'])] result_dict['C:FRG-FAR'] += [int(row['C:FRG-FAR'])] result_dict['C:SW-OCC'] += [int(row['C:SW-OCC'])] result_dict['C:SW-FAR'] += [int(row['C:SW-FAR'])] result_dict['C:MT-FST'] += [100 * float(row['C:MT-FST'])] result_dict['C:ML-FST'] += [100 * float(row['C:ML-FST'])] result_dict['C:FRG-FST'] += [int(row['C:FRG-FST'])] result_dict['C:SW-FST'] += [int(row['C:SW-FST'])] result_dict['P:MT-OCC'] += [100 * float(row['P:MT-OCC'])] result_dict['P:MT-FAR'] += [100 * float(row['P:MT-FAR'])] result_dict['P:ML-OCC'] += [100 * float(row['P:ML-OCC'])] result_dict['P:ML-FAR'] += [100 * float(row['P:ML-FAR'])] result_dict['P:FRG-OCC'] += [int(row['P:FRG-OCC'])] result_dict['P:FRG-FAR'] += [int(row['P:FRG-FAR'])] result_dict['P:SW-OCC'] += [int(row['P:SW-OCC'])] result_dict['P:SW-FAR'] += [int(row['P:SW-FAR'])] return result_dict
def get_forecasting_results(): """ """ fpath = '/Users/johnlamb/Downloads/cvpr-argoverse-forecasting-winners.csv' rows = read_csv(fpath, delimiter=',') result_dict = defaultdict(list) for i, row in enumerate(rows): print(row['Team name']) result_dict['Team name'] += [row['Team name']] result_dict['minADE (K=1)'] += [float(row['minADE (K=1)'])] result_dict['minFDE (K=1)'] += [float(row['minFDE (K=1)'])] result_dict['DAC (K=1)'] += [float(row['DAC (K=1)'])] result_dict['MR (K=1)'] += [float(row['MR (K=1)'])] result_dict['minADE (K=6)'] += [float(row['minADE (K=6)'])] result_dict['minFDE (K=6)'] += [float(row['minFDE (K=6)'])] result_dict['DAC (K=6)'] += [float(row['DAC (K=6)'])] result_dict['MR (K=6)'] += [float(row['MR (K=6)'])] result_dict['p-minADE (K=6)'] += [float(row['p-minADE (K=6)'])] result_dict['p-minFDE (K=6)'] += [float(row['p-minFDE (K=6)'])] return result_dict
def count_hours(): """ """ bucket_names = [] bucket_counts = [] mseg_hours = 0 for dirpath in [ '/Users/johnlamb/Downloads/part1_csvs', '/Users/johnlamb/Downloads/part2_csvs' ]: csv_fpaths = glob.glob(f'{dirpath}/*.csv') print(f'Found {len(csv_fpaths)} csv files.') for csv_fpath in csv_fpaths: rows = read_csv(csv_fpath, delimiter=',') bucket_name = rows[0]['Input.image_url_1'] batch_sec = 0 for row in rows: sec = int(row['WorkTimeInSeconds']) batch_sec += sec batch_hours = batch_sec / (60*60) mseg_hours += batch_hours batch_days = batch_hours / 24 #print(f'Batch took {batch_days:.2f} days -> {bucket_name}') bucket_names += [bucket_name] bucket_counts += [batch_days] mseg_days = mseg_hours/24 print(f'MSeg took {mseg_days} days') bucket_names = np.array(bucket_names) bucket_counts = np.array(bucket_counts) sort_idxs = np.argsort(-bucket_counts) sorted_bucket_names = bucket_names[sort_idxs] sorted_bucket_counts = bucket_counts[sort_idxs] for b_name, b_days in zip(sorted_bucket_names,sorted_bucket_counts): print(f'{b_days:.2f} for {b_name}')
def test_read_csv(): """ Given csv data with the following form ------------------------- WorkerId,image_url_1 1,cat.png 2,dog.png ------------------------- Ensure we can read it out correctly as OrderedDicts using DictReader. """ d1 = collections.OrderedDict() d1['WorkerId'] = '1' d1['image_url_1'] = 'cat.png' d2 = collections.OrderedDict() d2['WorkerId'] = '2' d2['image_url_1'] = 'dog.png' dict_list = [d1, d2] csv_fpath = f'{_TEST_DIR}/test_data/dummy_csv_data_to_read.csv' rows = read_csv(csv_fpath) assert rows == dict_list
def create_write_hit_specs( split: str, csv_save_fpath: str, hit_info: SentinelHIT, fpaths: List[str], sentinel_percentage: int = 10 ) -> None: """ We generally set NUM_IMGS_PER_HIT to 100, and a 10% sentinel pctg., such that 90% of the HIT is legitimate work. Args: - split: str, - csv_save_fpath: str, - hit_info: SentinelHIT, - fpaths: List[str], - sentinel_percentage: int = 10 Returns: - None """ if split == 'val': split_bucket_name = hit_info.val_bucket_name elif split == 'train': split_bucket_name = hit_info.train_bucket_name row_keys = [ f'image_url_{i}' for i in range(NUM_IMGS_PER_HIT) ] num_nonsentinels = NUM_IMGS_PER_HIT - int(sentinel_percentage * NUM_IMGS_PER_HIT / 100) csv_row_dicts = [] hit_start_idxs = range(0,len(fpaths), num_nonsentinels) # count off 90 at a time for batch_idx, start_idx in enumerate(hit_start_idxs): # choose up to 90 imgs, set ending index to be one greater than last valid idx end_idx = min(start_idx + num_nonsentinels, len(fpaths)) print(f'Forming Hit {batch_idx} of {split}: from {start_idx}->{end_idx}') hit_urls = accumulate_hit_urls( hit_info, hit_fpaths=fpaths[start_idx:end_idx], split_bucket_name=split_bucket_name ) # at least 10% are Sentinels/Gold Standard # choose 10 random sentinels (or more to pad batch to 100) num_req_sents = NUM_IMGS_PER_HIT - len(hit_urls) # number of requested sentinels print(f'\t Need to add sentinels. Current len: {len(hit_urls)}') hit_urls.extend(accumulate_hit_urls_from_sentinels(hit_info, num_req_sents)) print(f'\t Added sentinels. Current len: {len(hit_urls)}') # random shuffle random choice # if batch_idx == len(hit_start_idxs) - 1: # we're on the last one # may have duplicates from sentinels. dont want them adjacent to one another. random.shuffle(hit_urls) else: # wont have duplicates. sorting is fine then. hit_urls.sort() csv_row_dict = { k:url for k,url in zip(row_keys, hit_urls)} csv_row_dicts += [csv_row_dict] write_csv(csv_save_fpath, dict_list=csv_row_dicts) written_rows = read_csv(csv_save_fpath) num_hits = len(written_rows) assert num_hits == math.ceil( len(fpaths) / num_nonsentinels )