예제 #1
0
class Collection:
    """Methods to manipulate entire set of data releases."""

    all_dates = DateHelper.get_supported_dates()
    year, month = DateHelper.get_latest_date()
    latest_vintage = Vintage(year, month)

    @classmethod
    def save_latest(cls):
        cls.latest_vintage.save()

    @classmethod
    def approve_latest(cls):
        """Quick check for algorithm on latest available data."""
        cls.latest_vintage.validate()

    @classmethod
    def save_all(cls):
        for year, month in cls.all_dates:
            Vintage(year, month).save()

    @classmethod
    def approve_all(cls):
        """Checks all dates, runs for about 1-2 min of a fast computer.
           May fail if dataset not complete, eg word2csv written only part
           of CSV file.
        """
        for year, month in cls.all_dates:
            print("Checking", year, month)
            vintage = Vintage(year, month)
            vintage.validate()
예제 #2
0
 def test_get_supported_dates_excludes_2013_11(self):
     assert (2013, 11) not in DateHelper.get_supported_dates()
예제 #3
0
 def test_get_supported_dates_ends_with_latest_date(self):
     prev_month_date = dt.datetime.today().replace(day=1) - dt.timedelta(
         days=1)
     assert DateHelper.get_supported_dates()[-1] == (prev_month_date.year,
                                                     prev_month_date.month)
예제 #4
0
 def test_get_supported_dates_starts_in_2009_4(self):
     assert DateHelper.get_supported_dates()[0] == (2009, 4)
예제 #5
0
 def test_get_latest_date(self):
     year, month = DateHelper.get_latest_date()
     assert year >= 2017
     assert month >= 1
     assert month <= 12
예제 #6
0
 def test_validate_failes(self):
     with pytest.raises(ValueError):
         DateHelper.validate(2030, 1)
예제 #7
0
 def test_validate_passes(self):
     DateHelper.validate(2015, 6)
예제 #8
0
    def __str__(self):
        show = [
            "Table {} ({} columns)".format(self.label, self.coln),
            '\n'.join(["{} <{}>".format(v, k) for k, v in self.lines.items()]),
            '\n'.join([str(row) for row in self.datarows])
        ]
        return "\n".join(show)

    def __repr__(self):
        return "Table(headers={},\ndatarows={})".format(
            repr(self.headers), repr(self.datarows))


if __name__ == "__main__":
    from config import PathHelper, DateHelper  # this is in __main__ section
    import csv2df.reader as reader
    import csv2df.specification as spec

    year, month = DateHelper.get_latest_date()
    csv_path = PathHelper.locate_csv(year, month)
    with reader.open_csv(csv_path) as csvfile:
        parsed_tables = []
        for csv_segment, pdef in reader.Reader(csvfile, spec.SPEC).items():
            tables = extract_tables(csv_segment, pdef)
            parsed_tables.extend(tables)

        for t in tables:
            print()
            print(t)
예제 #9
0
def get_latest_date(dhelper=DateHelper):
    return DateHelper.get_latest_date()