Python read_csv Examples, mseg.utils.csv_utils.read_csv Python Examples

Example #1

0

Show file

File: publish_tasks.py Project: hyzcn/mseg-mturk

def verify_num_unique_urls(
		split: str, 
		csv_save_fpath: str, 
		hit_info: SentinelHIT, 
		split_fpaths: List[str]
	) -> None:
	"""
		Args:
		-	split: str, 
		-	csv_save_fpath: str, 
		-	hit_info: SentinelHIT, 
		-	split_fpaths: List[str]

		Returns:
		-	None
	"""
	split_fnames = [ Path(fpath).name for fpath in split_fpaths]
	rows = read_csv(csv_save_fpath)
	
	unique_found_fnames = set()
	for row in rows:
		for k in row.keys():
			url = row[k]
			fname = Path(url).name
			unique_found_fnames.add(fname)

	sent_fnames = list(zip(*hit_info.sentinels))[0] # sentinel fnames
	found_plus_sent = unique_found_fnames.union(set(sent_fnames))
	split_plus_sent = set(split_fnames).union(set(sent_fnames))
	assert found_plus_sent == split_plus_sent
	print('All expected URLS found in published csv. Success...')

Example #2

0

Show file

File: test_csv_utils.py Project: yddd2333/mseg-api

def test_write_csv():
    """
	"""
    dict_list = [{
        'WorkerId': '1',
        'image_url_1': 'cat.png',
        'image_url_2': 'dog.png'
    }, {
        'WorkerId': '2',
        'image_url_1': 'elephant.png',
        'image_url_2': 'house.png'
    }]
    csv_fpath = f'{_TEST_DIR}/test_data/temp_written_data.csv'
    write_csv(csv_fpath, dict_list)
    rows = read_csv(csv_fpath)

    d1 = collections.OrderedDict()
    d1['WorkerId'] = '1'
    d1['image_url_1'] = 'cat.png'
    d1['image_url_2'] = 'dog.png'

    d2 = collections.OrderedDict()
    d2['WorkerId'] = '2'
    d2['image_url_1'] = 'elephant.png'
    d2['image_url_2'] = 'house.png'
    gt_dict_list = [d1, d2]
    assert gt_dict_list == rows

    os.remove(csv_fpath)

Example #3

0

Show file

def get_tracking_results():
    """ """
    fpath = '/Users/johnlamb/Downloads/cvpr-argoverse-tracking-winners.csv'
    rows = read_csv(fpath, delimiter=',')
    result_dict = defaultdict(list)
    for i, row in enumerate(rows):
        print(row['Team name'])
        # 'Submission ID,Submitted at,AVG-RANK,
        result_dict['Team name'] += [row['Team name']]
        result_dict['C:MOTA'] += [float(row['C:MOTA'])]
        result_dict['P:MOTA'] += [float(row['P:MOTA'])]
        result_dict['C:MOTPD'] += [float(row['C:MOTPD'])]
        result_dict['P:MOTPD'] += [float(row['P:MOTPD'])]
        result_dict['C:MOTPO'] += [float(row['C:MOTPO'])]
        result_dict['P:MOTPO'] += [float(row['P:MOTPO'])]
        result_dict['C:MOTPI'] += [float(row['C:MOTPI'])]
        result_dict['P:MOTPI'] += [float(row['P:MOTPI'])]
        result_dict['C:IDF1'] += [100 * float(row['C:IDF1'])]
        result_dict['P:IDF1'] += [100 * float(row['P:IDF1'])]
        result_dict['C:MT'] += [100 * float(row['C:MT'])]
        result_dict['P:MT'] += [100 * float(row['P:MT'])]
        result_dict['C:ML'] += [100 * float(row['C:ML'])]
        result_dict['P:ML'] += [100 * float(row['P:ML'])]
        result_dict['C:FP'] += [int(row['C:FP'])]
        result_dict['P:FP'] += [int(row['P:FP'])]
        result_dict['C:FN'] += [int(row['C:FN'])]
        result_dict['P:FN'] += [int(row['P:FN'])]
        result_dict['C:SW'] += [int(row['C:SW'])]
        result_dict['P:SW'] += [int(row['P:SW'])]
        result_dict['C:FRG'] += [int(row['C:FRG'])]
        result_dict['P:FRG'] += [int(row['P:FRG'])]
        result_dict['C:MT-OCC'] += [100 * float(row['C:MT-OCC'])]
        result_dict['C:MT-FAR'] += [100 * float(row['C:MT-FAR'])]
        result_dict['C:ML-OCC'] += [100 * float(row['C:ML-OCC'])]
        result_dict['C:ML-FAR'] += [100 * float(row['C:ML-FAR'])]
        result_dict['C:FRG-OCC'] += [int(row['C:FRG-OCC'])]
        result_dict['C:FRG-FAR'] += [int(row['C:FRG-FAR'])]
        result_dict['C:SW-OCC'] += [int(row['C:SW-OCC'])]
        result_dict['C:SW-FAR'] += [int(row['C:SW-FAR'])]
        result_dict['C:MT-FST'] += [100 * float(row['C:MT-FST'])]
        result_dict['C:ML-FST'] += [100 * float(row['C:ML-FST'])]
        result_dict['C:FRG-FST'] += [int(row['C:FRG-FST'])]
        result_dict['C:SW-FST'] += [int(row['C:SW-FST'])]
        result_dict['P:MT-OCC'] += [100 * float(row['P:MT-OCC'])]
        result_dict['P:MT-FAR'] += [100 * float(row['P:MT-FAR'])]
        result_dict['P:ML-OCC'] += [100 * float(row['P:ML-OCC'])]
        result_dict['P:ML-FAR'] += [100 * float(row['P:ML-FAR'])]
        result_dict['P:FRG-OCC'] += [int(row['P:FRG-OCC'])]
        result_dict['P:FRG-FAR'] += [int(row['P:FRG-FAR'])]
        result_dict['P:SW-OCC'] += [int(row['P:SW-OCC'])]
        result_dict['P:SW-FAR'] += [int(row['P:SW-FAR'])]
    return result_dict

Example #4

0

Show file

def get_forecasting_results():
    """ """
    fpath = '/Users/johnlamb/Downloads/cvpr-argoverse-forecasting-winners.csv'
    rows = read_csv(fpath, delimiter=',')
    result_dict = defaultdict(list)
    for i, row in enumerate(rows):

        print(row['Team name'])
        result_dict['Team name'] += [row['Team name']]
        result_dict['minADE (K=1)'] += [float(row['minADE (K=1)'])]
        result_dict['minFDE (K=1)'] += [float(row['minFDE (K=1)'])]
        result_dict['DAC (K=1)'] += [float(row['DAC (K=1)'])]
        result_dict['MR (K=1)'] += [float(row['MR (K=1)'])]
        result_dict['minADE (K=6)'] += [float(row['minADE (K=6)'])]
        result_dict['minFDE (K=6)'] += [float(row['minFDE (K=6)'])]
        result_dict['DAC (K=6)'] += [float(row['DAC (K=6)'])]
        result_dict['MR (K=6)'] += [float(row['MR (K=6)'])]
        result_dict['p-minADE (K=6)'] += [float(row['p-minADE (K=6)'])]
        result_dict['p-minFDE (K=6)'] += [float(row['p-minFDE (K=6)'])]

    return result_dict

Example #5

0

Show file

File: count_worker_hours.py Project: mseg-dataset/mseg-mturk

def count_hours():
	""" """

	bucket_names = []
	bucket_counts = []

	mseg_hours = 0
	for dirpath in [
		'/Users/johnlamb/Downloads/part1_csvs',
		'/Users/johnlamb/Downloads/part2_csvs'
	]:
		csv_fpaths = glob.glob(f'{dirpath}/*.csv')
		print(f'Found {len(csv_fpaths)} csv files.')
		for csv_fpath in csv_fpaths:
			rows = read_csv(csv_fpath, delimiter=',')
			bucket_name = rows[0]['Input.image_url_1']
			batch_sec = 0
			for row in rows:
				sec = int(row['WorkTimeInSeconds'])
				batch_sec += sec

			batch_hours = batch_sec / (60*60)
			mseg_hours += batch_hours
			batch_days = batch_hours / 24
			#print(f'Batch took {batch_days:.2f} days -> {bucket_name}')
			bucket_names += [bucket_name]
			bucket_counts += [batch_days]

	mseg_days = mseg_hours/24
	print(f'MSeg took {mseg_days} days')

	bucket_names = np.array(bucket_names)
	bucket_counts = np.array(bucket_counts)
	sort_idxs = np.argsort(-bucket_counts)
	
	sorted_bucket_names = bucket_names[sort_idxs]
	sorted_bucket_counts = bucket_counts[sort_idxs]

	for b_name, b_days in zip(sorted_bucket_names,sorted_bucket_counts):
		print(f'{b_days:.2f} for {b_name}')

Example #6

0

Show file

File: test_csv_utils.py Project: yddd2333/mseg-api

def test_read_csv():
    """
	Given csv data with the following form
	-------------------------
		WorkerId,image_url_1
		1,cat.png
		2,dog.png
	-------------------------
	Ensure we can read it out correctly as OrderedDicts using DictReader.
	"""
    d1 = collections.OrderedDict()
    d1['WorkerId'] = '1'
    d1['image_url_1'] = 'cat.png'

    d2 = collections.OrderedDict()
    d2['WorkerId'] = '2'
    d2['image_url_1'] = 'dog.png'
    dict_list = [d1, d2]

    csv_fpath = f'{_TEST_DIR}/test_data/dummy_csv_data_to_read.csv'
    rows = read_csv(csv_fpath)
    assert rows == dict_list

Example #7

0

Show file

File: publish_tasks.py Project: hyzcn/mseg-mturk

def create_write_hit_specs(
	split: str,
	csv_save_fpath: str,
	hit_info: SentinelHIT, 
	fpaths: List[str], 
	sentinel_percentage: int = 10
) -> None:
	""" We generally set NUM_IMGS_PER_HIT to 100, and a 10% sentinel pctg., such
		that 90% of the HIT is legitimate work.

		Args:
		-	split: str,
		-	csv_save_fpath: str,
		-	hit_info: SentinelHIT, 
		-	fpaths: List[str], 
		-	sentinel_percentage: int = 10

		Returns:
		-	None
	"""
	if split == 'val':
		split_bucket_name = hit_info.val_bucket_name
	elif split == 'train':
		split_bucket_name = hit_info.train_bucket_name

	row_keys = [ f'image_url_{i}' for i in range(NUM_IMGS_PER_HIT) ]
	num_nonsentinels = NUM_IMGS_PER_HIT - int(sentinel_percentage * NUM_IMGS_PER_HIT / 100)
	csv_row_dicts = []

	hit_start_idxs = range(0,len(fpaths), num_nonsentinels)
	# count off 90 at a time 
	for batch_idx, start_idx in enumerate(hit_start_idxs):

		# choose up to 90 imgs, set ending index to be one greater than last valid idx
		end_idx = min(start_idx + num_nonsentinels, len(fpaths)) 
		print(f'Forming Hit {batch_idx} of {split}: from {start_idx}->{end_idx}')
		hit_urls = accumulate_hit_urls(
			hit_info, 
			hit_fpaths=fpaths[start_idx:end_idx], 
			split_bucket_name=split_bucket_name
		)

		# at least 10% are Sentinels/Gold Standard
		# choose 10 random sentinels (or more to pad batch to 100)
		num_req_sents = NUM_IMGS_PER_HIT - len(hit_urls) # number of requested sentinels
		print(f'\t Need to add sentinels. Current len: {len(hit_urls)}')
		hit_urls.extend(accumulate_hit_urls_from_sentinels(hit_info, num_req_sents))
		print(f'\t Added sentinels. Current len: {len(hit_urls)}')
		# random shuffle random choice
		#
		if batch_idx == len(hit_start_idxs) - 1:
			# we're on the last one
			# may have duplicates from sentinels. dont want them adjacent to one another.
			random.shuffle(hit_urls)
		else:
			# wont have duplicates. sorting is fine then.
			hit_urls.sort()
		csv_row_dict = { k:url for k,url in zip(row_keys, hit_urls)} 
		csv_row_dicts += [csv_row_dict]

	write_csv(csv_save_fpath, dict_list=csv_row_dicts)

	written_rows = read_csv(csv_save_fpath)
	num_hits = len(written_rows)
	assert num_hits == math.ceil( len(fpaths) / num_nonsentinels )