def check_parse(fixie_basename, i):
    observed_dict = parse_file(os.path.join('fixtures', fixie_basename + '.txt'), 'r')[i - 1]
    observed_csv  =  StringIO.StringIO()
    observed_dict.to_csv(observed_csv, index=False, header=True, encoding='utf-8', na_rep='')

    observed = observed_csv.getvalue()
    expected = open(os.path.join('fixtures', '%s_t%d.csv' % (fixie_basename, i ))).read()

    for o,e in zip(observed.split('\n'), expected.split('\n')):
        n.assert_equal(len(o), len(e))
Ejemplo n.º 2
0
def check_parse(fixie_basename, i):
    observed_dict = parse_file(
        os.path.join('fixtures', fixie_basename + '.txt'), 'r')[i - 1]
    observed_csv = StringIO.StringIO()
    observed_dict.to_csv(observed_csv,
                         index=False,
                         header=True,
                         encoding='utf-8',
                         na_rep='')

    observed = observed_csv.getvalue()
    expected = open(
        os.path.join('fixtures', '%s_t%d.csv' % (fixie_basename, i))).read()

    for o, e in zip(observed.split('\n'), expected.split('\n')):
        n.assert_equal(len(o), len(e))
Ejemplo n.º 3
0
download_fms_fixies.download_fixies(start_date, end_date)

# check all downloaded fixies against all parsed csvs
downloaded_files = set([fixie.split('.')[0] for fixie in os.listdir(FIXIE_DIR) if fixie.endswith('.txt')])
def parsed_files():
	return set([csv.split('_')[0] for csv in os.listdir(DAILY_CSV_DIR) if csv.endswith('.csv')])


## PARSE! #####################################################################
# fixies that have not yet been parsed into csvs
new_files = sorted(list(downloaded_files.difference(parsed_files())))

# parse all teh fixies!
for f in new_files:
	fname = os.path.join(FIXIE_DIR, f+'.txt')
	dfs = parse_fms_fixies.parse_file(fname, verbose=False)

	# each table for each date stored in separate csv files
	for df in dfs.values():
		try:
			t_name = df.ix[0,'table']
			t_name_match = re.search(r'TABLE [\w-]+', t_name)
			t_name_short = re.sub(r'-| ', '_', t_name_match.group().lower())
		except Exception as e:
			print('***ERROR: tables failed to parse!', e)
			# go on
			continue

		daily_csv = os.path.join(DAILY_CSV_DIR, f.split('.')[0]+'_'+t_name_short+'.csv')
		df.to_csv(daily_csv, index=False, header=True, encoding='utf-8', na_rep='')