class Collection: """Methods to manipulate entire set of data releases.""" all_dates = DateHelper.get_supported_dates() year, month = DateHelper.get_latest_date() latest_vintage = Vintage(year, month) @classmethod def save_latest(cls): cls.latest_vintage.save() @classmethod def approve_latest(cls): """Quick check for algorithm on latest available data.""" cls.latest_vintage.validate() @classmethod def save_all(cls): for year, month in cls.all_dates: Vintage(year, month).save() @classmethod def approve_all(cls): """Checks all dates, runs for about 1-2 min of a fast computer. May fail if dataset not complete, eg word2csv written only part of CSV file. """ for year, month in cls.all_dates: print("Checking", year, month) vintage = Vintage(year, month) vintage.validate()
def test_get_supported_dates_excludes_2013_11(self): assert (2013, 11) not in DateHelper.get_supported_dates()
def test_get_supported_dates_ends_with_latest_date(self): prev_month_date = dt.datetime.today().replace(day=1) - dt.timedelta( days=1) assert DateHelper.get_supported_dates()[-1] == (prev_month_date.year, prev_month_date.month)
def test_get_supported_dates_starts_in_2009_4(self): assert DateHelper.get_supported_dates()[0] == (2009, 4)
def test_get_latest_date(self): year, month = DateHelper.get_latest_date() assert year >= 2017 assert month >= 1 assert month <= 12
def test_validate_failes(self): with pytest.raises(ValueError): DateHelper.validate(2030, 1)
def test_validate_passes(self): DateHelper.validate(2015, 6)
def __str__(self): show = [ "Table {} ({} columns)".format(self.label, self.coln), '\n'.join(["{} <{}>".format(v, k) for k, v in self.lines.items()]), '\n'.join([str(row) for row in self.datarows]) ] return "\n".join(show) def __repr__(self): return "Table(headers={},\ndatarows={})".format( repr(self.headers), repr(self.datarows)) if __name__ == "__main__": from config import PathHelper, DateHelper # this is in __main__ section import csv2df.reader as reader import csv2df.specification as spec year, month = DateHelper.get_latest_date() csv_path = PathHelper.locate_csv(year, month) with reader.open_csv(csv_path) as csvfile: parsed_tables = [] for csv_segment, pdef in reader.Reader(csvfile, spec.SPEC).items(): tables = extract_tables(csv_segment, pdef) parsed_tables.extend(tables) for t in tables: print() print(t)
def get_latest_date(dhelper=DateHelper): return DateHelper.get_latest_date()