def test_process_if_needed_missing_writes(self, xml_parser): """If output isn't already present, we should process. If it is present, we don't need to, unless a dependency has changed.""" with self.cli.isolated_filesystem(): build_tree = xml_parser.reg_text.build_tree build_tree.return_value = Node() last_versions = [annual_editions.LastVersionInYear('1111', 2000)] entry.Version('12', '1000', '1111').write( Version('1111', date(2000, 1, 1), date(2000, 1, 1))) entry.Entry('annual', '12', '1000', 2000).write( b'<ROOT></ROOT>') annual_editions.process_if_needed('12', '1000', last_versions) self.assertTrue(build_tree.called) build_tree.reset_mock() entry.Entry('tree', '12', '1000', '1111').write(b'tree-here') annual_editions.process_if_needed('12', '1000', last_versions) self.assertFalse(build_tree.called) # Simulate a change to an input file label_id = str(entry.Annual(12, 1000, 2000)) new_time = timezone.now() + timedelta(hours=1) DBEntry.objects.filter(label_id=label_id).update(modified=new_time) annual_editions.process_if_needed('12', '1000', last_versions) self.assertTrue(build_tree.called)
def test_process_if_needed_missing_writes(monkeypatch): """If output isn't already present, we should process. If it is present, we don't need to, unless a dependency has changed.""" monkeypatch.setattr(annual_editions, 'gpo_cfr', Mock()) build_tree = annual_editions.gpo_cfr.builder.build_tree build_tree.return_value = Node() last_versions = [annual_editions.LastVersionInYear('1111', 2000)] entry.Version('12', '1000', '1111').write( Version('1111', date(2000, 1, 1), Citation(1, 1))) entry.Entry('annual', '12', '1000', 2000).write(b'<ROOT></ROOT>') annual_editions.process_if_needed('12', '1000', last_versions) assert build_tree.called build_tree.reset_mock() entry.Entry('tree', '12', '1000', '1111').write(b'tree-here') annual_editions.process_if_needed('12', '1000', last_versions) assert not build_tree.called # Simulate a change to an input file label_id = str(entry.Annual(12, 1000, 2000)) new_time = timezone.now() + timedelta(hours=1) DBEntry.objects.filter(label_id=label_id).update(modified=new_time) annual_editions.process_if_needed('12', '1000', last_versions) assert build_tree.called
def test_stale_layers(self): """We should have dependencies between all of the layers and their associated trees. We should also tie the meta layer to the version""" configured_layers = {'cfr': {'keyterms': None, 'other': None}} with self.cli.isolated_filesystem(), patch.dict( layers.LAYER_CLASSES, configured_layers): version_entry = entry.Version(111, 22, 'aaa') version_entry.write(Version('aaa', date.today(), date.today())) tree_entry = entry.Tree(111, 22, 'aaa') # Use list() to instantiate self.assertRaises(dependency.Missing, list, layers.stale_layers(tree_entry, 'cfr')) entry.Entry('tree', 111, 22, 'bbb').write(b'') # wrong version self.assertRaises(dependency.Missing, list, layers.stale_layers(tree_entry, 'cfr')) entry.Entry('tree', 111, 22, 'aaa').write(b'') six.assertCountEqual(self, layers.stale_layers(tree_entry, 'cfr'), ['keyterms', 'other']) self.assertIn( str(version_entry), dependency.Graph().dependencies( str(entry.Layer.cfr(111, 22, 'aaa', 'meta'))))
def test_write_if_needed_no_need_to_recompute(monkeypatch): """If all dependencies are up to date and the output is present, there's no need to write anything""" monkeypatch.setattr(versions, 'write_to_disk', Mock()) entry.Entry('notice_xml', '111').write(b'content') entry.Entry('version', 'title', 'part', '111').write(b'out') versions.write_if_needed('title', 'part', ['111'], {'111': 'xml111'}, {}) assert not versions.write_to_disk.called
def test_deletes_index(tmpdir_setup): entry.Entry('aaa', 'bbb').write(b'ccc') entry.Entry('bbb', 'ccc').write(b'ddd') assert 1 == len(list(entry.Entry("aaa").sub_entries())) assert 1 == len(list(entry.Entry("bbb").sub_entries())) CliRunner().invoke(clear) assert [] == list(entry.Entry().sub_entries())
def test_write_if_needed_no_need_to_recompute(self, write_to_disk): """If all dependencies are up to date and the output is present, there's no need to write anything""" with self.cli.isolated_filesystem(): entry.Entry('notice_xml', '111').write(b'content') entry.Entry('version', 'title', 'part', '111').write(b'out') versions.write_if_needed('title', 'part', ['111'], {'111': 'xml111'}, {}) self.assertFalse(write_to_disk.called)
def test_process_no_need_to_create(self): """If everything is up to date, we don't need to build new versions""" with CliRunner().isolated_filesystem(): annual = entry.Entry('annual', self.title, self.part, self.year) tree = entry.Entry('tree', self.title, self.part, self.version_id) annual.write(b'ANNUAL') tree.write(b'TREE') current_version.process_if_needed(self.volume, self.part) # didn't change self.assertEqual(annual.read(), b'ANNUAL') self.assertEqual(tree.read(), b'TREE')
def test_process_no_need_to_create(): """If everything is up to date, we don't need to build new versions""" title, part, year = randint(1, 999), randint(1, 999), randint(2000, 2020) annual = entry.Entry('annual', title, part, year) tree = entry.Entry('tree', title, part, '{0}-annual-{1}'.format(year, part)) annual.write(b'ANNUAL') tree.write(b'TREE') annual_version.process_if_needed(Volume(year, title, 1), part) # didn't change assert annual.read() == b'ANNUAL' assert tree.read() == b'TREE'
def test_deletes_can_be_focused(tmpdir_setup): """If params are provided to delete certain directories, only those directories should get removed""" to_delete = ['delroot/aaa/bbb', 'delroot/aaa/ccc', 'root/delsub/aaa', 'root/delsub/bbb'] to_keep = ['root/othersub/aaa', 'root/aaa', 'top-level-file', 'other-root/aaa'] for path in to_delete + to_keep: entry.Entry(*path.split('/')).write(b'') CliRunner().invoke(clear, ['delroot', 'root/delsub']) assert {os.sep.join(c.path) for c in entry.Entry().sub_entries()} == set(to_keep)
def test_process_creation(self): """If no tree is present, we should build one""" to_patch = 'regparser.commands.current_version.xml_parser' with CliRunner().isolated_filesystem(), patch(to_patch) as xml_parser: entry.Entry('annual', self.title, self.part, self.year).write('<ROOT />') xml_parser.reg_text.build_tree.return_value = {'my': 'tree'} current_version.process_if_needed(self.volume, self.part) tree = entry.Entry('tree', self.title, self.part, self.version_id).read() self.assertEqual(json.loads(tree), {'my': 'tree'}) notice = entry.SxS(self.version_id).read() self.assertEqual(notice['document_number'], self.version_id) self.assertEqual(notice['cfr_parts'], [str(self.part)])
def test_process_creation(monkeypatch): """If no tree is present, we should build one""" title, part, year = randint(1, 999), randint(1, 999), randint(2000, 2020) version_id = '{0}-annual-{1}'.format(year, part) monkeypatch.setattr(annual_version, 'builder', Mock()) entry.Entry('annual', title, part, year).write(b'<ROOT />') annual_version.builder.build_tree.return_value = {'my': 'tree'} annual_version.process_if_needed(Volume(year, title, 1), part) tree = entry.Entry('tree', title, part, version_id).read() assert json.loads(tree.decode('utf-8')) == {'my': 'tree'} notice = entry.Notice(version_id).read() assert notice.version_id == version_id assert notice.cfr_refs == [TitlePartsRef(title, [part])]
def test_process(monkeypatch): """Verify that the correct changes are found""" compile_regulation = Mock(return_value=Node()) monkeypatch.setattr(fill_with_rules, 'compile_regulation', compile_regulation) notice_mock = Mock() # entry.Notice('new').read().amendments notice_mock.return_value.read.return_value.amendments = [ {"instruction": "Something something", "cfr_part": "1000", "authority": "USC Numbers"}, {"instruction": "More things", "cfr_part": "1000", "changes": [["1000-2-b", ["2b changes"]], ["1000-2-c", ["2c changes"]]]}, {"instruction": "Yet more changes", "cfr_part": "1000", "changes": [["1000-4-a", ["4a changes"]]]} ] monkeypatch.setattr(fill_with_rules.entry, 'Notice', notice_mock) tree_dir = entry.Tree('12', '1000') (tree_dir / 'old').write(Node()) entry.Entry('notice_xml', 'new').write(b'') fill_with_rules.process(tree_dir, 'old', 'new') changes = dict(compile_regulation.call_args[0][1]) assert changes == {"1000-2-b": ["2b changes"], "1000-2-c": ["2c changes"], "1000-4-a": ["4a changes"]}
def test_write_if_needed_output_missing(self, write_to_disk): """If the output file is missing, we'll always write""" with self.cli.isolated_filesystem(): entry.Entry('notice_xml', '111').write(b'content') versions.write_if_needed('title', 'part', ['111'], {'111': 'xml111'}, {}) self.assertTrue(write_to_disk.called)
def test_process_creation(self): """If no tree is present, we should build one""" to_patch = 'regparser.commands.current_version.xml_parser' with CliRunner().isolated_filesystem(), patch(to_patch) as xml_parser: entry.Entry('annual', self.title, self.part, self.year).write(b'<ROOT />') xml_parser.reg_text.build_tree.return_value = {'my': 'tree'} current_version.process_if_needed(self.volume, self.part) tree = entry.Entry('tree', self.title, self.part, self.version_id).read() self.assertEqual(json.loads(tree.decode('utf-8')), {'my': 'tree'}) notice = entry.Notice(self.version_id).read() self.assertEqual(notice.version_id, self.version_id) self.assertEqual(notice.cfr_refs, [TitlePartsRef(self.title, [self.part])])
def test_write_if_needed_delays(self, write_to_disk): """Delays introduce dependencies.""" with self.cli.isolated_filesystem(): entry.Entry('notice_xml', '111').write(b'content') entry.Entry('notice_xml', '222').write(b'content') entry.Entry('version', 'title', 'part', '111').write(b'out') versions.write_if_needed( 'title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) self.assertFalse(write_to_disk.called) # Simulate a change to an input file os.utime(str(entry.Notice('222')), (time() + 1000, time() + 1000)) versions.write_if_needed( 'title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) self.assertTrue(write_to_disk.called)
def test_write_if_needed_delays(monkeypatch): """Delays introduce dependencies.""" monkeypatch.setattr(versions, 'write_to_disk', Mock()) entry.Entry('notice_xml', '111').write(b'content') entry.Entry('notice_xml', '222').write(b'content') entry.Entry('version', 'title', 'part', '111').write(b'out') versions.write_if_needed('title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) assert not versions.write_to_disk.called # Simulate a change to an input file label_id = str(entry.Notice('222')) new_time = timezone.now() + timedelta(hours=1) DBEntry.objects.filter(label_id=label_id).update(modified=new_time) versions.write_if_needed('title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) assert versions.write_to_disk.called
def test_is_stale(): """We should raise dependency exceptions when necessary files haven't been processed. We need SxS entries _and_ the relevant tree""" with pytest.raises(dependency.Missing): sxs_layers.is_stale(11, 222, 'aaa') create_versions() entry.Entry('sxs', 'aaa').write(b'') entry.Entry('sxs', 'bbb').write(b'') with pytest.raises(dependency.Missing): sxs_layers.is_stale(11, 222, 'aaa') entry.Entry('tree', 11, 222, 'bbb').write(b'') # Wrong tree with pytest.raises(dependency.Missing): sxs_layers.is_stale(11, 222, 'aaa') entry.Entry('tree', 11, 222, 'aaa').write(b'') assert sxs_layers.is_stale(11, 222, 'aaa')
def test_fetch_version_ids_no_local(self, fetch_notice_json): """If there are no local copies, the document numbers found in the FR notices should be passed through""" fetch_notice_json.return_value = [{'document_number': '1'}, {'document_number': '22'}] with self.cli.isolated_filesystem(): path = entry.Entry("path") self.assertEqual(['1', '22'], versions.fetch_version_ids('title', 'part', path))
def test_write_if_needed_delays(self, write_to_disk): """Delays introduce dependencies.""" with self.cli.isolated_filesystem(): entry.Entry('notice_xml', '111').write(b'content') entry.Entry('notice_xml', '222').write(b'content') entry.Entry('version', 'title', 'part', '111').write(b'out') versions.write_if_needed( 'title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) self.assertFalse(write_to_disk.called) # Simulate a change to an input file label_id = str(entry.Notice('222')) new_time = timezone.now() + timedelta(hours=1) DBEntry.objects.filter(label_id=label_id).update(modified=new_time) versions.write_if_needed( 'title', 'part', ['111'], {'111': 'xml111'}, {'111': versions.Delay('222', 'until-date')}) self.assertTrue(write_to_disk.called)
def test_deletes_can_be_focused(self): """If params are provided to delete certain directories, only those directories should get removed""" with self.cli.isolated_filesystem(): to_delete = ['delroot/aaa/bbb', 'delroot/aaa/ccc', 'root/delsub/aaa', 'root/delsub/bbb'] to_keep = ['root/othersub/aaa', 'root/aaa', 'top-level-file', 'other-root/aaa'] for path in to_delete + to_keep: entry.Entry(*path.split('/')).write('') self.cli.invoke(clear, ['delroot', 'root/delsub']) self.assertItemsEqual(['top-level-file', 'root', 'other-root'], list(entry.Entry())) self.assertItemsEqual(['othersub', 'aaa'], list(entry.Entry('root'))) self.assertItemsEqual(['aaa'], list(entry.Entry('other-root')))
def test_is_stale(self): """We should raise dependency exceptions when necessary files haven't been processed. We need SxS entries _and_ the relevant tree""" with CliRunner().isolated_filesystem(): self.assertRaises(dependency.Missing, sxs_layers.is_stale, 11, 222, 'aaa') self.create_versions() entry.Entry('sxs', 'aaa').write(b'') entry.Entry('sxs', 'bbb').write(b'') self.assertRaises(dependency.Missing, sxs_layers.is_stale, 11, 222, 'aaa') entry.Entry('tree', 11, 222, 'bbb').write(b'') # Wrong tree self.assertRaises(dependency.Missing, sxs_layers.is_stale, 11, 222, 'aaa') entry.Entry('tree', 11, 222, 'aaa').write(b'') self.assertTrue(sxs_layers.is_stale(11, 222, 'aaa'))
def test_fetch_version_ids_local(self, fetch_notice_json): """If a notice is split into multiple entries locally, a single document number might result in multiple version ids""" fetch_notice_json.return_value = [{'document_number': '1'}, {'document_number': '22'}] with self.cli.isolated_filesystem(): path = entry.Entry("path") (path / '1_20010101').write('v1') (path / '1_20020202').write('v2') (path / '22').write('second') (path / '22-3344').write('unrelated file') self.assertEqual(['1_20010101', '1_20020202', '22'], versions.fetch_version_ids('title', 'part', path))
def test_fetch_version_ids_no_local(monkeypatch): """If there are no local copies, the document numbers found in the FR notices should be passed through""" monkeypatch.setattr( versions, 'fetch_notice_json', Mock(return_value=[{ 'document_number': '1', 'full_text_xml_url': 'somewhere' }, { 'document_number': '22', 'full_text_xml_url': 'somewhere' }])) path = entry.Entry("path") assert ['1', '22'] == versions.fetch_version_ids('title', 'part', path)
def test_process_if_needed_missing_writes(self, xml_parser): """If output isn't already present, we should process. If it is present, we don't need to, unless a dependency has changed.""" with self.cli.isolated_filesystem(): build_tree = xml_parser.reg_text.build_tree build_tree.return_value = Node() last_versions = [annual_editions.LastVersionInYear('1111', 2000)] entry.Version('12', '1000', '1111').write( Version('1111', date(2000, 1, 1), date(2000, 1, 1))) entry.Entry('annual', '12', '1000', 2000).write(b'<ROOT></ROOT>') annual_editions.process_if_needed('12', '1000', last_versions) self.assertTrue(build_tree.called) build_tree.reset_mock() entry.Entry('tree', '12', '1000', '1111').write(b'tree-here') annual_editions.process_if_needed('12', '1000', last_versions) self.assertFalse(build_tree.called) # Simulate a change to an input file os.utime(str(entry.Annual('12', '1000', '2000')), (time() + 1000, time() + 1000)) annual_editions.process_if_needed('12', '1000', last_versions) self.assertTrue(build_tree.called)
def test_stale_layers(monkeypatch): """We should have dependencies between all of the layers and their associated trees. We should also tie the meta layer to the version""" monkeypatch.setattr(layers, 'LAYER_CLASSES', {'cfr': { 'keyterms': None, 'other': None }}) version_entry = entry.Version(111, 22, 'aaa') version_entry.write(Version('aaa', date.today(), Citation(1, 1))) tree_entry = entry.Tree(111, 22, 'aaa') with pytest.raises(dependency.Missing): layers.stale_layers(tree_entry, 'cfr') entry.Entry('tree', 111, 22, 'bbb').write(b'') # wrong version with pytest.raises(dependency.Missing): layers.stale_layers(tree_entry, 'cfr') entry.Entry('tree', 111, 22, 'aaa').write(b'') assert set(layers.stale_layers(tree_entry, 'cfr')) == {'keyterms', 'other'} assert str(version_entry) in dependency.Graph().dependencies( str(entry.Layer.cfr(111, 22, 'aaa', 'meta')))
def test_deletes_index(self): with self.cli.isolated_filesystem(): entry.Entry('aaa', 'bbb').write('ccc') entry.Entry('bbb', 'ccc').write('ddd') self.assertEqual(1, len(entry.Entry("aaa"))) self.assertEqual(1, len(entry.Entry("bbb"))) self.cli.invoke(clear) self.assertEqual(0, len(entry.Entry("aaa"))) self.assertEqual(0, len(entry.Entry("bbb")))
def test_fetch_version_ids_skip_no_xml(monkeypatch): """We'll skip over all of the versions which don't have XML""" monkeypatch.setattr( versions, 'fetch_notice_json', Mock(return_value=[{ 'document_number': '1', 'full_text_xml_url': 'something' }, { 'document_number': '2', 'full_text_xml_url': None }, { 'document_number': '3', 'full_text_xml_url': 'somewhere' }])) path = entry.Entry("path") assert ['1', '3'] == versions.fetch_version_ids('title', 'part', path)
def test_rebuild(self): """Validate that the `rebuild()` method calculates the correct "stale" references""" with CliRunner().isolated_filesystem(): graph = dependency.Graph() path = entry.Entry('path') a, b, c, d = [path / char for char in 'abcd'] # (A, B) -> C -> D graph.add(c, a) graph.add(c, b) graph.add(d, c) # None of the files exist yet; A & B have no dependencies, so they # are stale due to themselves. C & D are stale due either A or B self.assert_rebuilt_state(graph, path, a='a', b='b', c='ab', d='ab') b.write(b'bbb') # B exists now, so dependency errors are only due to A now self.assert_rebuilt_state(graph, path, a='a', b='', c='a', d='a') a.write(b'aaa') # A exists now, too, so C is the bottleneck self.assert_rebuilt_state(graph, path, a='', b='', c='c', d='c') c.write(b'ccc') # Now there's only the final, self-reference self.assert_rebuilt_state(graph, path, a='', b='', c='', d='d') d.write(b'ddd') # Now no one is stale self.assert_rebuilt_state(graph, path, a='', b='', c='', d='') self._touch(a, 1000) # A's been updated. Need to run everything after it self.assert_rebuilt_state(graph, path, a='', b='', c='a', d='a') self._touch(d, 2000) self._touch(c, 3000) # C and D have been updated, but C's been updated after D self.assert_rebuilt_state(graph, path, a='', b='', c='', d='c')
def test_writes(self, meta_data, build_notice): """If the notice XML is present, we write the parsed version to disk, even if that version's already present""" with self.cli.isolated_filesystem(): entry.Notice('1111').write(self.notice_xml) self.cli.invoke(fetch_sxs, ['1111']) meta_data.return_value = {'example': 1} self.assertTrue(build_notice.called) args, kwargs = build_notice.call_args self.assertTrue(args[2], {'example': 1}) self.assertTrue( isinstance(kwargs['xml_to_process'], etree._Element)) build_notice.reset_mock() entry.Entry('rule_changes', '1111').write('content') self.cli.invoke(fetch_sxs, ['1111']) self.assertTrue(build_notice.called)
def test_fetch_version_ids_local(monkeypatch): """If a notice is split into multiple entries locally, a single document number might result in multiple version ids""" monkeypatch.setattr( versions, 'fetch_notice_json', Mock(return_value=[{ 'document_number': '1', 'full_text_xml_url': 'somewhere' }, { 'document_number': '22', 'full_text_xml_url': 'somewhere' }])) path = entry.Entry("path") (path / '1_20010101').write(b'v1') (path / '1_20020202').write(b'v2') (path / '22').write(b'second') (path / '22-3344').write(b'unrelated file') assert versions.fetch_version_ids( 'title', 'part', path) == ['1_20010101', '1_20020202', '22']