def test_coverage_export(self): now = datetime.datetime(2017, 5, 6, 16, 29, 43, tzinfo=datetime.timezone.utc) with app.app_context(): job = Job(ACTION_TYPE_COVERAGE_EXPORT, coverage_id='fr-idf', state='failed', id='8422cadb-4e68-4142-be27-f2ec32af49a3', started_at=now) job.save() mailer = Mailer({}) t = mailer.get_message(Job.get_one(job.id)) result = t.split('\n') excepted = [ 'Problem Tartare, Platform Unknown', '', '', 'Start execution : {}'.format(now.isoformat(sep=' ')), 'End execution : {}'.format(now.isoformat(sep=' ')), 'Action type: {}'.format(ACTION_TYPE_COVERAGE_EXPORT), 'Job: 8422cadb-4e68-4142-be27-f2ec32af49a3', 'Step: None', 'Coverage: fr-idf', 'Error Message : ', '', '', '===========================================================================', 'Automatic email from Tartare', '===========================================================================' ] self._same_list(excepted, result)
def test_post_dataset(self, data_source): raw = self.get('/contributors/id_test/data_sources/{}'.format(data_source.get('id'))) self.assert_sucessful_call(raw) ds = self.json_to_dict(raw)['data_sources'][0] assert ds['status'] == DATA_SOURCE_STATUS_NEVER_FETCHED assert ds['fetch_started_at'] is None assert ds['updated_at'] is None assert ds['validity_period'] is None raw = self.post_manual_data_set('id_test', data_source.get('id'), 'gtfs/some_archive.zip') r = self.json_to_dict(raw) assert len(r["data_sets"]) == 1 assert 'id' in r['data_sets'][0] with app.app_context(): gridfs = mongo.db['fs.files'].find_one({'_id': ObjectId(r["data_sets"][0]["gridfs_id"])}) assert gridfs["filename"] == "some_archive.zip" raw = self.get('/contributors/id_test/data_sources/{}'.format(data_source.get('id'))) self.assert_sucessful_call(raw) ds = self.json_to_dict(raw)['data_sources'][0] assert 'id' in ds['data_sets'][0] assert ds['status'] == DATA_SOURCE_STATUS_UPDATED assert not ds['fetch_started_at'] assert ds['updated_at'] assert ds['validity_period']
def test_cancel_pending_updated_before(self): with app.app_context(): job1 = Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid1', None, None, 'pending', 'my-step', 'job1-id', datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8)) job2 = Job(ACTION_TYPE_COVERAGE_EXPORT, None, 'covida', None, 'done', 'my-step', 'job2-id', datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8)) job3 = Job(ACTION_TYPE_AUTO_COVERAGE_EXPORT, 'covidb', None, None, 'running', 'my-step', 'job3-id', datetime.now() - timedelta(hours=15), datetime.now() - timedelta(hours=6)) job4 = Job(ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT, 'cid2', None, None, 'failed', 'my-step', 'job4-id', datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8)) job5 = Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid3', None, None, 'running', 'my-step', 'job5-id', datetime.now() - timedelta(hours=2), datetime.now()) jobs = [ job1, job2, job3, job4, job5 ] for job in jobs: job.save() cancelled_jobs = Job.cancel_pending_updated_before(4, [JOB_STATUS_RUNNING, JOB_STATUS_PENDING]) assert len(cancelled_jobs) == 2 assert [job.id for job in cancelled_jobs] == ['job1-id', 'job3-id']
def test_compute_directions(self, init_http_download_server, data_set_filename, expected_trips_file_name): self.init_contributor('cid', 'dsid', self.format_url(init_http_download_server.ip_addr, data_set_filename, path='compute_directions'), export_id='export_id') self.add_data_source_to_contributor('cid', 'config_ds_id', self.format_url(init_http_download_server.ip_addr, 'config.json', path='compute_directions'), DATA_FORMAT_DIRECTION_CONFIG) self.add_process_to_contributor({ 'type': 'ComputeDirections', 'input_data_source_ids': ['dsid'], 'configuration_data_sources': [ {'name': 'directions', 'ids': ['config_ds_id']} ], 'sequence': 0 }, 'cid') self.contributor_export('cid') gridfs_id = self.get_gridfs_id_from_data_source('cid', 'export_id') with app.app_context(): new_zip_file = GridFsHandler().get_file_from_gridfs(gridfs_id) with ZipFile(new_zip_file, 'r') as new_zip_file: with tempfile.TemporaryDirectory() as tmp_dir_name: assert_zip_contains_only_txt_files(new_zip_file) new_zip_file.extractall(tmp_dir_name) assert_text_files_equals(os.path.join(tmp_dir_name, 'trips.txt'), _get_file_fixture_full_path(expected_trips_file_name))
def test_get_jobs_paginated(self): start = datetime_from_string('2014-04-15 15:37:44 UTC') with app.app_context(): for i in range(1, 30 + 1): Job(id='job-{}'.format(i), started_at=start + datetime.timedelta(minutes=i)).save() # default pagination pagination, jobs = self.get_jobs() assert pagination == {'page': 1, 'per_page': 20, 'total': 30} assert len(jobs) == 20 # with page pagination, jobs = self.get_jobs(page=2) assert pagination == {'page': 2, 'per_page': 20, 'total': 30} assert len(jobs) == 10 # with per_page pagination, jobs = self.get_jobs(per_page=5) assert pagination == {'page': 1, 'per_page': 5, 'total': 30} assert len(jobs) == 5 # with both pagination, jobs = self.get_jobs(page=2, per_page=5) assert pagination == {'page': 2, 'per_page': 5, 'total': 30} assert len(jobs) == 5 # sorted by date assert [job['id'] for job in jobs] == ['job-25', 'job-24', 'job-23', 'job-22', 'job-21'] pagination, jobs = self.get_jobs(page=4, per_page=5) assert len(jobs) == 5 assert [job['id'] for job in jobs] == ['job-15', 'job-14', 'job-13', 'job-12', 'job-11'] # last page with less than per_page elements pagination, jobs = self.get_jobs(page=7, per_page=4) assert len(jobs) == 4 pagination, jobs = self.get_jobs(page=8, per_page=4) assert len(jobs) == 2 # page with no elements pagination, jobs = self.get_jobs(page=9, per_page=4) assert len(jobs) == 0 assert pagination == {'page': 9, 'per_page': 4, 'total': 30}
def test_ods_file_exist(_extract_path): with app.app_context(): expected_filename = '{coverage_id}.zip'.format(coverage_id=cov_id) target_grid_fs_id = self.get_gridfs_id_from_data_source_of_coverage(cov_id, "target_id") ods_zip_file = GridFsHandler().get_file_from_gridfs(target_grid_fs_id) assert ods_zip_file.filename == expected_filename return ods_zip_file
def test_get_job_coverage(self): coverage_id = 'covid' with app.app_context(): Job(coverage_id=coverage_id).save() Job().save() Job(coverage_id=coverage_id).save() _, all_jobs = self.get_jobs() assert len(all_jobs) == 3 pagination, coverage_jobs = self.get_jobs(coverage_id=coverage_id) assert len(coverage_jobs) == 2 assert pagination == {'page': 1, 'per_page': 20, 'total': 2}
def test_get_jobs_sorted_by_updated_date(self): with app.app_context(): Job(id='job-1', updated_at=datetime_from_string('2014-04-15 15:37:44 UTC')).save() Job(id='job-2', updated_at=datetime_from_string('2014-04-20 15:37:44 UTC')).save() Job(id='job-3', updated_at=datetime_from_string('2014-04-10 15:37:44 UTC')).save() Job(id='job-4', updated_at=datetime_from_string('2014-05-01 15:37:44 UTC')).save() _, jobs = self.get_jobs() assert jobs[0]['id'] == 'job-4' assert jobs[1]['id'] == 'job-2' assert jobs[2]['id'] == 'job-1' assert jobs[3]['id'] == 'job-3'
def assert_agency_data_equals(self, expected_data, expected_filename): gridfs_id = self.get_gridfs_id_from_data_source('contrib_id', 'export_id') with app.app_context(): new_gridfs_file = GridFsHandler().get_file_from_gridfs(gridfs_id) with ZipFile(new_gridfs_file, 'r') as gtfs_zip: assert_zip_contains_only_txt_files(gtfs_zip) assert gtfs_zip.filename == expected_filename assert 'agency.txt' in gtfs_zip.namelist() data = get_dict_from_zip(gtfs_zip, 'agency.txt') assert len(data) == 1 for key, value in expected_data.items(): assert value == data[0][key]
def test_contributor_process(): map_test = { RuspellProcess(): contributor.Ruspell, HeadsignShortNameProcess(): contributor.HeadsignShortName, GtfsAgencyFileProcess(): contributor.GtfsAgencyFile, ComputeExternalSettingsProcess(): contributor.ComputeExternalSettings, ComputeDirectionsProcess(): contributor.ComputeDirections, } with app.app_context(): for key, value in map_test.items(): assert isinstance(ProcessManager.get_process( ContributorExportContext(Job(ACTION_TYPE_CONTRIBUTOR_EXPORT)), key), value) for key in map_test.keys(): with pytest.raises(InvalidArguments) as excinfo: ProcessManager.get_process(CoverageExportContext(Job(ACTION_TYPE_COVERAGE_EXPORT)), key) assert str(excinfo.typename) == "InvalidArguments"
def test_headsign_short_name(self, init_http_download_server): url = self.format_url(ip=init_http_download_server.ip_addr, path='headsign_short_name', filename='headsign_short_name.zip') job = self.__contributor_creator(url) assert job['state'] == 'done' assert job['step'] == 'save_contributor_export' assert job['error_message'] == '' with app.app_context(): gridfs_id = self.get_gridfs_id_from_data_source('id_test', 'export_id') new_zip_file = GridFsHandler().get_file_from_gridfs(gridfs_id) with ZipFile(new_zip_file, 'r') as new_zip_file: with tempfile.TemporaryDirectory() as tmp_dir_name: assert_zip_contains_only_txt_files(new_zip_file) new_zip_file.extractall(tmp_dir_name) assert_text_files_equals(os.path.join(tmp_dir_name, 'trips.txt'), _get_file_fixture_full_path('headsign_short_name/ref_trips.txt'))
def test_historisation(self, contributor, init_http_download_server, exports_number): contributor = self.__init_contributor_config(contributor) self.__init_coverage_config() url_gtfs = self.format_url(ip=init_http_download_server.ip_addr, filename='historisation/gtfs-{number}.zip') url_config = self.format_url(ip=init_http_download_server.ip_addr, filename='historisation/config-{number}.json') for i in range(1, exports_number + 1): contributor['data_sources'][0]['input']["url"] = url_gtfs.format(number=i) contributor['data_sources'][1]['input']["url"] = url_config.format(number=i) self.put('/contributors/id_test', self.dict_to_json(contributor)) self.full_export('id_test', 'jdr') raw = self.get('/contributors/id_test') contributor = self.assert_sucessful_call(raw)['contributors'][0] with app.app_context(): self.assert_data_set_number('data_source_gtfs', exports_number) self.assert_data_set_number('data_source_config', exports_number) self.assert_coverage_exports_number(exports_number) self.assert_files_number(exports_number)
def test_automatic_update_twice_multi_contributor_and_multi_coverage(self, init_http_download_server): with freeze_time(datetime_from_string('2018-01-15 10:00:00 UTC')) as frozen_datetime: contributors = ['c1', 'c2', 'c3', 'c4'] coverages = {'cA': ['c1_export', 'c2_export'], 'cB': ['c3_export'], 'cC': []} for contributor in contributors: self.__create_contributor(init_http_download_server.ip_addr, contributor) for cov, ds in coverages.items(): self.__create_coverage(ds, cov) jobs_first_run = self.run_automatic_update() assert len(jobs_first_run) == 6 contributor_export_jobs = list( filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT and job[ 'step'] == 'save_contributor_export', jobs_first_run)) coverage_export_jobs = list( filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_COVERAGE_EXPORT and job[ 'step'] == 'save_coverage_export', jobs_first_run)) assert len(contributor_export_jobs) == 4 # all contributor_export are launched assert len(coverage_export_jobs) == 2 # cA and cB launched (not cC because no contributors attached) # remove old jobs with app.app_context(): mongo.db['jobs'].delete_many({}) # update c1 data source self.update_data_source_url('c1', 'ds_c1', self.format_url(init_http_download_server.ip_addr, 'sample_1.zip')) frozen_datetime.move_to(datetime_from_string('2018-01-15 10:08:00 UTC')) jobs_second_run = self.run_automatic_update() contributor_export_jobs = list( filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT, jobs_second_run)) coverage_export_jobs = list( filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_COVERAGE_EXPORT, jobs_second_run)) assert len(contributor_export_jobs) == 4 # all contributor_export are launched assert len(coverage_export_jobs) == 1 # cA launched because c1 was updated contributor_export_unchanged_jobs = list( filter(lambda job: job['step'] == 'fetching data', contributor_export_jobs)) # when data source url does not change it will not generate a coverage export assert len(contributor_export_unchanged_jobs) == 3
def test_fetch_ok(self, init_http_download_server, contributor): ip = init_http_download_server.ip_addr url = self.format_url(ip, 'sample_1.zip') contributor['data_sources'].append({ "name": "bobette", "data_format": "gtfs", "input": { "type": "auto", "url": url, "frequency": { "type": "daily", "hour_of_day": 20 } } }) raw = self.put('/contributors/id_test', params=self.dict_to_json(contributor)) json_response = self.json_to_dict(raw) data_source_id = json_response['contributors'][0]['data_sources'][0]['id'] raw = self.post('/contributors/{}/data_sources/{}/actions/fetch'.format(contributor['id'], data_source_id)) self.assert_sucessful_call(raw, 204) data_source = \ self.json_to_dict(self.get('contributors/{}/data_sources/{}'.format(contributor['id'], data_source_id)))[ 'data_sources'][0] data_set = data_source['data_sets'][0] # Test that source file and saved file are the same with app.app_context(): gridout = GridFsHandler().get_file_from_gridfs(data_set['gridfs_id']) expected_path = _get_file_fixture_full_path('gtfs/sample_1.zip') with tempfile.TemporaryDirectory() as path: gridout_path = os.path.join(path, gridout.filename) with open(gridout_path, 'wb+') as f: f.write(gridout.read()) assert_text_files_equals(gridout_path, expected_path) jobs = self.get_all_jobs() assert len(jobs) == 0
def test_get_job_with_details(self): now = datetime_from_string('2014-04-15 15:37:44 UTC') update = now + datetime.timedelta(days=1) update2 = now + datetime.timedelta(days=10) with app.app_context(): Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid1', None, None, 'pending', 'my-step', 'job1-id', now, update, "", 'dsid').save() Job(ACTION_TYPE_COVERAGE_EXPORT, None, 'covid', None, 'failed', 'failed-step', 'job2-id', now, update2, "boom", 'dsid').save() _, jobs = self.get_jobs() assert len(jobs) == 2 job1 = self.get_job_details('job1-id') assert job1['id'] == 'job1-id' assert job1['action_type'] == ACTION_TYPE_CONTRIBUTOR_EXPORT assert job1['step'] == 'my-step' assert job1['state'] == 'pending' assert job1['started_at'] == '2014-04-15T15:37:44+00:00' assert job1['updated_at'] == '2014-04-16T15:37:44+00:00' assert job1['error_message'] == '' assert job1['contributor_id'] == 'cid1' assert not job1['coverage_id'] assert not job1['parent_id'] job2 = self.get_job_details('job2-id') assert job2['id'] == 'job2-id' assert job2['action_type'] == ACTION_TYPE_COVERAGE_EXPORT assert job2['step'] == 'failed-step' assert job2['state'] == 'failed' assert job2['started_at'] == '2014-04-15T15:37:44+00:00' assert job2['updated_at'] == '2014-04-25T15:37:44+00:00' assert job2['error_message'] == 'boom' assert job2['coverage_id'] == 'covid' assert not job2['contributor_id'] assert not job2['parent_id']
def test_prepare_external_settings(self, init_http_download_server): valid_process = { 'type': 'ComputeExternalSettings', 'input_data_source_ids': ['dsid'], 'target_data_source_id': 'target_id', 'sequence': 0, 'configuration_data_sources': [ {'name': 'perimeter', 'ids': ['perimeter_id']}, {'name': 'lines_referential', 'ids': ['lines_referential_id']}, ] } self.init_contributor('cid', 'dsid', self.format_url(init_http_download_server.ip_addr, 'fr-idf-custo-post-fusio-sample.zip', 'prepare_external_settings'), data_prefix='OIF') self.add_data_source_to_contributor('cid', 'perimeter_id', self.format_url(init_http_download_server.ip_addr, 'tr_perimeter_id.json', 'prepare_external_settings'), DATA_FORMAT_TR_PERIMETER) self.add_data_source_to_contributor('cid', 'lines_referential_id', self.format_url(init_http_download_server.ip_addr, 'lines_referential_id.json', 'prepare_external_settings'), DATA_FORMAT_LINES_REFERENTIAL) self.add_process_to_contributor(valid_process, 'cid') self.contributor_export('cid') target_grid_fs_id = self.get_gridfs_id_from_data_source('cid', 'target_id') with app.app_context(): fusio_settings_zip_file = GridFsHandler().get_file_from_gridfs(target_grid_fs_id) with ZipFile(fusio_settings_zip_file, 'r') as fusio_settings_zip_file: with tempfile.TemporaryDirectory() as tmp_dir_name: assert_zip_contains_only_files_with_extensions(fusio_settings_zip_file, ['txt']) fusio_settings_zip_file.extractall(tmp_dir_name) assert_text_files_equals(os.path.join(tmp_dir_name, 'fusio_object_codes.txt'), _get_file_fixture_full_path( 'prepare_external_settings/expected_fusio_object_codes.txt')) assert_text_files_equals(os.path.join(tmp_dir_name, 'fusio_object_properties.txt'), _get_file_fixture_full_path( 'prepare_external_settings/expected_fusio_object_properties.txt'))
def get_app_context(): with app.app_context(): yield
def empty_mongo(docker): """Empty mongo db before each tests""" with app.app_context(): mongo.db.client.drop_database(docker.DBNAME) models.init_mongo()
def __assert_should_fetch(self, should_fetch): with app.app_context(): data_source = DataSource.get_one(data_source_id='dsid') assert data_source.should_fetch() == should_fetch
def test_get_one_job_found(self): job_id = 'my_id' with app.app_context(): Job(id=job_id).save() job = self.get_job_details(job_id) assert job['id'] == job_id