Beispiel #1
0
    def test_sync_ndays(self, repo_ledger, sink_fs: FileRepoFS, missing: Iterator[RepoObjectPath]) -> None:
        src_fs = mock.MagicMock()
        src_fs.find.side_effect = self.mock_find
        pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs)

        with mock.patch("edgar.utils.repo.file_repo_fs.FileRepoFS.iterate_missing") as m:
            m.return_value = missing
            pipe.sync()

        for m in missing:
            period_type = m.date_period_type()
            the_date = m.date()
            o: RepoObject = sink_fs.find(period_type, the_date)
            assert ' '.join([str(period_type), str(the_date)]) == next(o.inp(bufsize=1024))
            assert o.exists()

        tracker: CallTracker = CallTracker()
        tracker.add_expected('next_period', [])
        tracker.add_expected('start' , [Date('2021-01-01')])
        tracker.add_expected('record', [Date('2021-07-12'), DatePeriodType.DAY])
        tracker.add_expected('record', [Date('2021-07-13'), DatePeriodType.DAY])
        tracker.add_expected('record', [Date('2021-07-14'), DatePeriodType.DAY])
        tracker.add_expected('end', [Date('2021-08-01')])
        tracker.assertCalls(repo_ledger.mock_calls)

        (beg_date, end_date) = repo_ledger.next_period()
        assert sink_fs.find(DatePeriodType.DAY, beg_date) == None
        assert sink_fs.find(DatePeriodType.DAY, end_date) == None
    def test_backfill_diff_quarters(self, from_date_str, to_date_str, elems):
        to_date: Date = Date(to_date_str)
        from_date: Date = Date(from_date_str)

        items: str = ""
        for date_period in to_date.backfill(from_date):
            items += str(date_period.period_type)
        assert items == elems
    def test_backfill_same_date(self, from_date_str, to_date_str,
                                has_backfill):
        to_date: Date = Date(to_date_str)
        from_date: Date = Date(from_date_str)

        for _ in to_date.backfill(from_date):
            assert has_backfill, "should return an empty iterator"
            return
        assert not has_backfill
 def test_add_days(self, from_date_str: str, to_date_str: str,
                   days: int) -> None:
     from_date: Date = Date(from_date_str)
     to_date: Date = Date(to_date_str)
     count: int = 0
     while from_date <= to_date:
         from_date += 1
         count += 1
     assert count == days
    def format(self, period_type: DatePeriodType, the_date: Date, **kwargs) -> List[str]:
        name_spec = self.__format.name_spec[period_type]
        path_spec = self.__format.path_spec

        eval_macros = dict(kwargs)
        for name, func in self.__macros.items():
            eval_macros[name] = func(period_type, the_date)

        return [*[the_date.format(s, period_type, **eval_macros) for s in path_spec], 
            the_date.format(name_spec, period_type, **eval_macros)]
    def test_backfill_same_quarter(self, from_date_str: str, to_date_str: str,
                                   grain_expected: str, num_expected: int):
        to_date: Date = Date(to_date_str)
        from_date: Date = Date(from_date_str)

        had_results = False
        for date_period in to_date.backfill(from_date):
            had_results = True
            assert date_period.period_type == grain_expected
            assert date_period.num_days == num_expected
            assert date_period.start_date == from_date
            assert date_period.end_date == to_date
        assert had_results
Beispiel #7
0
def edgar_fs() -> tempfile.TemporaryDirectory:
    temp: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory(
        suffix="_edgar_fs")
    root: Path = Path(temp.name)

    for t in [DatePeriodType.DAY, DatePeriodType.QUARTER]:
        base: Path = root / str(t)
        base.mkdir()

        for s in EDGAR_QUARTER:
            qtr = s.split('-')
            dir: Path = base
            for i in range(2):
                dir = dir / qtr[i]
                dir.mkdir()

            if t == DatePeriodType.QUARTER:
                file: Path = dir / 'master.idx'
                with file.open(mode="w", buffering=2048) as fd:
                    fd.write(str(file))
            else:
                dt: Date = Date(
                    date(int(qtr[0]), (int(qtr[1][3]) - 1) * 3 + 1, 1))
                for _ in range(int(qtr[2])):
                    file: Path = dir / dt.format('master{y}{m:02}{d:02}.idx')
                    with file.open(mode="w", buffering=2048) as fd:
                        fd.write(str(file))
                    dt.add_days(1)
    return temp
Beispiel #8
0
    def test_sync_create_error(self, create, repo_ledger, sink_fs: FileRepoFS, missing: Iterator[RepoObjectPath]) -> None:
        with mock.patch("edgar.utils.repo.file_repo_fs.FileRepoFS.iterate_missing") as iterate_missing:
            iterate_missing.return_value = missing
            create.side_effect = self.mock_create
            src_fs = mock.MagicMock()
            src_fs.find.side_effect = self.mock_find

            pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs)
            pipe.sync()

        tracker: CallTracker = CallTracker()
        tracker.add_expected('next_period', [])
        tracker.add_expected('start',  [Date('2021-01-01')])
        tracker.add_expected('record', [Date('2021-07-12'), DatePeriodType.DAY])
        tracker.add_expected('error',  [Date('2021-07-13'), repr(FileExistsError())])
        tracker.assertCalls(repo_ledger.mock_calls)
    def iterate_missing(self, from_date: Date,
                        to_date: Date) -> Iterator[RepoURI]:
        """
            Identifies objects that are not in the repository
            or need to be updated for the given dates

            Parameters
            ----------
            from_date: Date
                the start date
            to_date: Date
                the end date

            Returns
            -------
            Iterator[str]
                an iterator for missing objects
        """
        self.refresh()

        track_year, track_quarter = 0, 0
        cur_holidays: us_holidays = None
        cur_date: Date = from_date.copy()

        for _ in range(to_date.diff_days(from_date)):
            (cur_year, cur_quarter, *_) = cur_date.tuple()

            if cur_year != track_year:
                # Moving to the first or to the next year
                cur_holidays = us_holidays(cur_year)
                track_year, track_quarter = cur_year, 0

            if not (cur_date.is_weekend() or cur_date in cur_holidays):
                obj_path: RepoObjectPath = RepoObjectPath.from_date(
                    DatePeriodType.DAY, cur_date, self.__format)
                if str(obj_path) not in self.__index:
                    if cur_quarter != track_quarter:
                        # Add a quartely file to the update list
                        # only if it has not been added before
                        yield RepoObjectPath.from_date(DatePeriodType.QUARTER,
                                                       cur_date, self.__format)
                        track_quarter = cur_quarter

                    # Add a daily file to the update list
                    yield obj_path
            # next date
            cur_date += 1
Beispiel #10
0
    def __init__(self, year: int) -> None:
        self.list: List[Date] = []
        self.names: Dict[str, str] = {}

        for i in [
                # New Year              Jan 1
            (self.JANUARY, 1, 'New Year'
             's Day'),
                # Independence Day      July 4
            (self.JULY, 4, 'Independency Day'),
                # Veterans Day          Nov 11
            (self.NOVEMBER, 11, 'Veterans Day'),
                # Christmas Day         Dec 25
            (self.DECEMBER, 25, 'Christmas Day')
        ]:
            d: Date = Date(date(year, i[0], i[1]))
            self.names[str(d)] = i[2]
            self.list.append(d)

        for i in [
                # Martin Luther King, Jr.       third Mon in Jan
            (self.JANUARY, self.MONDAY, self.THIRD_WEEK,
             'Birthday of Martin Luther King, Jr.'),
                # Washington's Birthday         third Mon in Feb
            (self.FEBRUARY, self.MONDAY, self.THIRD_WEEK, 'Washington'
             's Birthday'),
                # Memorial Day                  last Mon in May
            (self.MAY, self.MONDAY, self.LAST_WEEK, 'Memorial Day'),
                # Labor Day                     first Mon in Sept
            (self.SEPTEMBER, self.MONDAY, self.FIRST_WEEK, 'Labor Day'),
                # Columbus Day                  second Mon in Oct
            (self.OCTOBER, self.MONDAY, self.SECOND_WEEK, 'Columbus Day'),
                # Thanksgiving Day              fourth Thur in Nov
            (self.NOVEMBER, self.THURSDAY, self.FOURTH_WEEK, 'Thanksgiving Day'
             ),
        ]:
            d: Date = Date(date(year, i[0], 1)).nthday_of_nthweek(i[1], i[2])
            self.names[str(d)] = i[3]
            self.list.append(d)

        for i in self.list:
            wd: int = i.isoweekday()
            if wd == self.SATURDAY:
                i += -1
            elif wd == self.SUNDAY:
                i += 1
Beispiel #11
0
 def test_macros(self, period_type: DatePeriodType, date_str: str,
                 path_spec: List[str], name_spec: str, expected: str):
     formatter: RepoFormatter = RepoFormatter(
         RepoFormat({period_type: name_spec}, path_spec))
     formatter[
         'z'] = lambda period_type, date: 'DAY' if period_type == DatePeriodType.DAY else 'QUARTER'
     assert '/'.join(formatter.format(period_type,
                                      Date(date_str))) == expected
Beispiel #12
0
 def test_getitem(self, path: List[str], date_period: str, quarter: str,
                  year: int, date_str: str) -> None:
     obj_path: RepoObjectPath = RepoObjectPath.from_list(
         path, self.REPO_FORMAT)
     assert obj_path[0] == date_period
     assert obj_path[1] == year
     assert obj_path[2] == quarter
     assert obj_path[3] == Date(date_str).format(
         'master{y}{m:02}{d:02}.idx')
Beispiel #13
0
 def test_init_with_list(self, path: List[str], date_period: str,
                         quarter: int, year: int, date_str: str) -> None:
     obj_path: RepoObjectPath = RepoObjectPath.from_list(
         path, self.REPO_FORMAT)
     assert obj_path.date_period_type() == DatePeriodType.from_string(
         date_period)
     assert obj_path.year() == year
     assert obj_path.quarter() == quarter
     assert obj_path.date() == Date(date_str)
 def test_record(self, ledger: DbRepoLedger) -> None:
     beg_ts: int = int(datetime.now().timestamp())
     ledger.record(Date('2021-11-11'), DatePeriodType.DAY)
     end_ts: int = int(datetime.now().timestamp())
     rows: list = ledger.dump()
     print(rows)
     assert len(rows) == 1
     assert rows[0][0] == 'record'
     assert rows[0][1] == '2021-11-11'
     assert rows[0][2] == 'D'
     assert rows[0][3] >= beg_ts
     assert rows[0][3] <= end_ts
Beispiel #15
0
    def test_sync_missing_error(self, iterate_missing, repo_ledger, sink_fs: FileRepoFS) -> None:
        src_fs = mock.MagicMock()
        src_fs.find.side_effect = self.mock_find
        error: FileNotFoundError = FileNotFoundError()
        iterate_missing.side_effect = error
        pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs)
        pipe.sync()

        tracker: CallTracker = CallTracker()
        tracker.add_expected('next_period', [])
        tracker.add_expected('start', [Date('2021-01-01')])
        tracker.add_expected('error', [None, repr(error)] )
        tracker.assertCalls(repo_ledger.mock_calls)
    def date(self) -> Date:
        """
        Returns the date for an object at the object path

        Returns
        -------
        Date
            the date
        """
        if not self.__date:
            params = parse(self.__format.name_spec[DatePeriodType.DAY], self.__list[-1])
            self.__date = Date(date(int(params['y']), int(params['m']),int(params['d'])))

        return self.__date
    def test_iterator(self) -> None:
        date_obj: Date = Date("2020-01-01")
        holidays: us_holidays = us_holidays(date_obj.year())
        dates: Dict[str, bool] = {}
        for i in holidays:
            dates[str(i)] = True

        assert ("2020-01-01") in dates
        assert ("2020-01-20") in dates
        assert ("2020-02-17") in dates
        assert ("2020-05-25") in dates
        assert ("2020-07-03") in dates
        assert ("2020-09-07") in dates
        assert ("2020-11-11") in dates
        assert ("2020-11-26") in dates
        assert ("2020-12-25") in dates
 def test_contains(self, date_str: str, expected_result: bool) -> None:
     date_obj: Date = Date(date_str)
     holidays: us_holidays = us_holidays(date_obj.year())
     assert (date_obj in holidays) == expected_result
Beispiel #19
0
 def test_find_object(self, period_type, date_str, expected) -> None:
     repo: HttpRepoFS = HttpRepoFS('https://www.sec.gov/Archives/edgar/',
                                   self.__formatter)
     obj: HttpRepoObject = repo.find(period_type, Date(date_str))
     assert obj.as_uri() == expected
Beispiel #20
0
 def test_kwargs(self, period_type: DatePeriodType, date_str: str,
                 path_spec: List[str], name_spec: str, expected: str):
     formatter: RepoFormatter = RepoFormatter(
         RepoFormat({period_type: name_spec}, path_spec))
     assert '/'.join(formatter.format(period_type, Date(date_str),
                                      z='X')) == expected
 def test_quarter(self, date_str: str, expected_result: int):
     date_obj = Date(date_str)
     assert date_obj.quarter() == expected_result
 def test_add_days(self, date_str: str, expected_result: str, days: int):
     date_obj: Date = Date(date_str)
     date_new: Date = date_obj.add_days(days)
     assert str(date_new) == expected_result
 def test_format_args(self, date_str: str, date_period_type: DatePeriodType,
                      args: Dict, format_spec: str, expected: str) -> None:
     date_obj = Date(date_str)
     assert date_obj.format(format_spec, date_period_type,
                            **args) == expected
 def test_format(self, date_str: str, format_spec: str,
                 expected: str) -> None:
     date_obj = Date(date_str)
     assert date_obj.format(format_spec) == expected
 def test_init_bad_format(self):
     with pytest.raises(ValueError):
         date_obj = Date("XXX")
 def test_init_success(self, date_str):
     date_obj = Date(date_str)
     assert date_str == str(date_obj)
 def is_weekend(self, date_str: str, expected_result: bool) -> None:
     date_obj: Date = Date(date_str)
     assert date_obj.is_weekend() == expected_result
 def test_diff_days(self, from_date_str: str, to_date_str: str,
                    expected_result: int):
     to_date: Date = Date(to_date_str)
     assert to_date.diff_days(Date(from_date_str)) == expected_result
 def test_nthday_of_nthweek(self, date_str: str, dayofweek: int,
                            whichweek: int, expected_result: str) -> None:
     date_obj: Date = Date(date_str)
     date_new: Date = date_obj.nthday_of_nthweek(dayofweek=dayofweek,
                                                 whichweek=whichweek)
     assert str(date_new) == expected_result
 def test_quarter_dates(self, date_str: str, expected_quarter_start: str,
                        expected_quarter_end: str):
     date_obj: Date = Date(date_str)
     assert date_obj.quarter_dates() == (Date(expected_quarter_start),
                                         Date(expected_quarter_end))