Exemple #1
0
 def test_coverage_export(self):
     now = datetime.datetime(2017, 5, 6, 16, 29, 43, tzinfo=datetime.timezone.utc)
     with app.app_context():
         job = Job(ACTION_TYPE_COVERAGE_EXPORT, coverage_id='fr-idf', state='failed',
                   id='8422cadb-4e68-4142-be27-f2ec32af49a3',
                   started_at=now)
         job.save()
         mailer = Mailer({})
         t = mailer.get_message(Job.get_one(job.id))
         result = t.split('\n')
         excepted = [
             'Problem Tartare, Platform Unknown',
             '',
             '',
             'Start execution : {}'.format(now.isoformat(sep=' ')),
             'End execution : {}'.format(now.isoformat(sep=' ')),
             'Action type: {}'.format(ACTION_TYPE_COVERAGE_EXPORT),
             'Job: 8422cadb-4e68-4142-be27-f2ec32af49a3',
             'Step: None',
             'Coverage: fr-idf',
             'Error Message : ',
             '',
             '',
             '===========================================================================',
             'Automatic email from Tartare',
             '==========================================================================='
         ]
         self._same_list(excepted, result)
Exemple #2
0
    def test_post_dataset(self, data_source):
        raw = self.get('/contributors/id_test/data_sources/{}'.format(data_source.get('id')))
        self.assert_sucessful_call(raw)
        ds = self.json_to_dict(raw)['data_sources'][0]
        assert ds['status'] == DATA_SOURCE_STATUS_NEVER_FETCHED
        assert ds['fetch_started_at'] is None
        assert ds['updated_at'] is None
        assert ds['validity_period'] is None

        raw = self.post_manual_data_set('id_test', data_source.get('id'), 'gtfs/some_archive.zip')
        r = self.json_to_dict(raw)
        assert len(r["data_sets"]) == 1
        assert 'id' in r['data_sets'][0]

        with app.app_context():
            gridfs = mongo.db['fs.files'].find_one({'_id': ObjectId(r["data_sets"][0]["gridfs_id"])})
            assert gridfs["filename"] == "some_archive.zip"

        raw = self.get('/contributors/id_test/data_sources/{}'.format(data_source.get('id')))
        self.assert_sucessful_call(raw)
        ds = self.json_to_dict(raw)['data_sources'][0]
        assert 'id' in ds['data_sets'][0]
        assert ds['status'] == DATA_SOURCE_STATUS_UPDATED
        assert not ds['fetch_started_at']
        assert ds['updated_at']
        assert ds['validity_period']
Exemple #3
0
    def test_cancel_pending_updated_before(self):
        with app.app_context():
            job1 = Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid1', None, None, 'pending', 'my-step', 'job1-id',
                       datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8))

            job2 = Job(ACTION_TYPE_COVERAGE_EXPORT, None, 'covida', None, 'done', 'my-step', 'job2-id',
                       datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8))

            job3 = Job(ACTION_TYPE_AUTO_COVERAGE_EXPORT, 'covidb', None, None, 'running', 'my-step', 'job3-id',
                       datetime.now() - timedelta(hours=15), datetime.now() - timedelta(hours=6))

            job4 = Job(ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT, 'cid2', None, None, 'failed', 'my-step', 'job4-id',
                       datetime.now() - timedelta(hours=10), datetime.now() - timedelta(hours=8))

            job5 = Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid3', None, None, 'running', 'my-step', 'job5-id',
                       datetime.now() - timedelta(hours=2), datetime.now())
            jobs = [
                job1, job2, job3, job4, job5
            ]

            for job in jobs:
                job.save()

            cancelled_jobs = Job.cancel_pending_updated_before(4, [JOB_STATUS_RUNNING, JOB_STATUS_PENDING])
            assert len(cancelled_jobs) == 2
            assert [job.id for job in cancelled_jobs] == ['job1-id', 'job3-id']
    def test_compute_directions(self, init_http_download_server, data_set_filename,
                                expected_trips_file_name):
        self.init_contributor('cid', 'dsid', self.format_url(init_http_download_server.ip_addr, data_set_filename,
                                                             path='compute_directions'), export_id='export_id')
        self.add_data_source_to_contributor('cid', 'config_ds_id',
                                            self.format_url(init_http_download_server.ip_addr, 'config.json',
                                                            path='compute_directions'), DATA_FORMAT_DIRECTION_CONFIG)
        self.add_process_to_contributor({
            'type': 'ComputeDirections',
            'input_data_source_ids': ['dsid'],
            'configuration_data_sources': [
                {'name': 'directions', 'ids': ['config_ds_id']}
            ],
            'sequence': 0
        }, 'cid')
        self.contributor_export('cid')

        gridfs_id = self.get_gridfs_id_from_data_source('cid', 'export_id')
        with app.app_context():
            new_zip_file = GridFsHandler().get_file_from_gridfs(gridfs_id)
        with ZipFile(new_zip_file, 'r') as new_zip_file:
            with tempfile.TemporaryDirectory() as tmp_dir_name:
                assert_zip_contains_only_txt_files(new_zip_file)
                new_zip_file.extractall(tmp_dir_name)
                assert_text_files_equals(os.path.join(tmp_dir_name, 'trips.txt'),
                                         _get_file_fixture_full_path(expected_trips_file_name))
Exemple #5
0
 def test_get_jobs_paginated(self):
     start = datetime_from_string('2014-04-15 15:37:44 UTC')
     with app.app_context():
         for i in range(1, 30 + 1):
             Job(id='job-{}'.format(i), started_at=start + datetime.timedelta(minutes=i)).save()
     # default pagination
     pagination, jobs = self.get_jobs()
     assert pagination == {'page': 1, 'per_page': 20, 'total': 30}
     assert len(jobs) == 20
     # with page
     pagination, jobs = self.get_jobs(page=2)
     assert pagination == {'page': 2, 'per_page': 20, 'total': 30}
     assert len(jobs) == 10
     # with per_page
     pagination, jobs = self.get_jobs(per_page=5)
     assert pagination == {'page': 1, 'per_page': 5, 'total': 30}
     assert len(jobs) == 5
     # with both
     pagination, jobs = self.get_jobs(page=2, per_page=5)
     assert pagination == {'page': 2, 'per_page': 5, 'total': 30}
     assert len(jobs) == 5
     # sorted by date
     assert [job['id'] for job in jobs] == ['job-25', 'job-24', 'job-23', 'job-22', 'job-21']
     pagination, jobs = self.get_jobs(page=4, per_page=5)
     assert len(jobs) == 5
     assert [job['id'] for job in jobs] == ['job-15', 'job-14', 'job-13', 'job-12', 'job-11']
     # last page with less than per_page elements
     pagination, jobs = self.get_jobs(page=7, per_page=4)
     assert len(jobs) == 4
     pagination, jobs = self.get_jobs(page=8, per_page=4)
     assert len(jobs) == 2
     # page with no elements
     pagination, jobs = self.get_jobs(page=9, per_page=4)
     assert len(jobs) == 0
     assert pagination == {'page': 9, 'per_page': 4, 'total': 30}
        def test_ods_file_exist(_extract_path):
            with app.app_context():
                expected_filename = '{coverage_id}.zip'.format(coverage_id=cov_id)
                target_grid_fs_id = self.get_gridfs_id_from_data_source_of_coverage(cov_id, "target_id")
                ods_zip_file = GridFsHandler().get_file_from_gridfs(target_grid_fs_id)
                assert ods_zip_file.filename == expected_filename

            return ods_zip_file
Exemple #7
0
 def test_get_job_coverage(self):
     coverage_id = 'covid'
     with app.app_context():
         Job(coverage_id=coverage_id).save()
         Job().save()
         Job(coverage_id=coverage_id).save()
     _, all_jobs = self.get_jobs()
     assert len(all_jobs) == 3
     pagination, coverage_jobs = self.get_jobs(coverage_id=coverage_id)
     assert len(coverage_jobs) == 2
     assert pagination == {'page': 1, 'per_page': 20, 'total': 2}
Exemple #8
0
    def test_get_jobs_sorted_by_updated_date(self):
        with app.app_context():
            Job(id='job-1', updated_at=datetime_from_string('2014-04-15 15:37:44 UTC')).save()
            Job(id='job-2', updated_at=datetime_from_string('2014-04-20 15:37:44 UTC')).save()
            Job(id='job-3', updated_at=datetime_from_string('2014-04-10 15:37:44 UTC')).save()
            Job(id='job-4', updated_at=datetime_from_string('2014-05-01 15:37:44 UTC')).save()

        _, jobs = self.get_jobs()
        assert jobs[0]['id'] == 'job-4'
        assert jobs[1]['id'] == 'job-2'
        assert jobs[2]['id'] == 'job-1'
        assert jobs[3]['id'] == 'job-3'
    def assert_agency_data_equals(self, expected_data, expected_filename):
        gridfs_id = self.get_gridfs_id_from_data_source('contrib_id', 'export_id')

        with app.app_context():
            new_gridfs_file = GridFsHandler().get_file_from_gridfs(gridfs_id)
            with ZipFile(new_gridfs_file, 'r') as gtfs_zip:
                assert_zip_contains_only_txt_files(gtfs_zip)
                assert gtfs_zip.filename == expected_filename
                assert 'agency.txt' in gtfs_zip.namelist()
                data = get_dict_from_zip(gtfs_zip, 'agency.txt')
                assert len(data) == 1
                for key, value in expected_data.items():
                    assert value == data[0][key]
Exemple #10
0
def test_contributor_process():
    map_test = {
        RuspellProcess(): contributor.Ruspell,
        HeadsignShortNameProcess(): contributor.HeadsignShortName,
        GtfsAgencyFileProcess(): contributor.GtfsAgencyFile,
        ComputeExternalSettingsProcess(): contributor.ComputeExternalSettings,
        ComputeDirectionsProcess(): contributor.ComputeDirections,
    }

    with app.app_context():
        for key, value in map_test.items():
            assert isinstance(ProcessManager.get_process(
                ContributorExportContext(Job(ACTION_TYPE_CONTRIBUTOR_EXPORT)), key), value)
    for key in map_test.keys():
        with pytest.raises(InvalidArguments) as excinfo:
            ProcessManager.get_process(CoverageExportContext(Job(ACTION_TYPE_COVERAGE_EXPORT)),
                                       key)
        assert str(excinfo.typename) == "InvalidArguments"
    def test_headsign_short_name(self, init_http_download_server):
        url = self.format_url(ip=init_http_download_server.ip_addr,
                              path='headsign_short_name',
                              filename='headsign_short_name.zip')
        job = self.__contributor_creator(url)

        assert job['state'] == 'done'
        assert job['step'] == 'save_contributor_export'
        assert job['error_message'] == ''

        with app.app_context():
            gridfs_id = self.get_gridfs_id_from_data_source('id_test', 'export_id')
            new_zip_file = GridFsHandler().get_file_from_gridfs(gridfs_id)
            with ZipFile(new_zip_file, 'r') as new_zip_file:
                with tempfile.TemporaryDirectory() as tmp_dir_name:
                    assert_zip_contains_only_txt_files(new_zip_file)
                    new_zip_file.extractall(tmp_dir_name)
                    assert_text_files_equals(os.path.join(tmp_dir_name, 'trips.txt'),
                                             _get_file_fixture_full_path('headsign_short_name/ref_trips.txt'))
Exemple #12
0
    def test_historisation(self, contributor, init_http_download_server, exports_number):
        contributor = self.__init_contributor_config(contributor)
        self.__init_coverage_config()
        url_gtfs = self.format_url(ip=init_http_download_server.ip_addr, filename='historisation/gtfs-{number}.zip')
        url_config = self.format_url(ip=init_http_download_server.ip_addr,
                                     filename='historisation/config-{number}.json')

        for i in range(1, exports_number + 1):
            contributor['data_sources'][0]['input']["url"] = url_gtfs.format(number=i)
            contributor['data_sources'][1]['input']["url"] = url_config.format(number=i)
            self.put('/contributors/id_test', self.dict_to_json(contributor))
            self.full_export('id_test', 'jdr')
            raw = self.get('/contributors/id_test')
            contributor = self.assert_sucessful_call(raw)['contributors'][0]

        with app.app_context():
            self.assert_data_set_number('data_source_gtfs', exports_number)
            self.assert_data_set_number('data_source_config', exports_number)
            self.assert_coverage_exports_number(exports_number)
            self.assert_files_number(exports_number)
    def test_automatic_update_twice_multi_contributor_and_multi_coverage(self, init_http_download_server):
        with freeze_time(datetime_from_string('2018-01-15 10:00:00 UTC')) as frozen_datetime:

            contributors = ['c1', 'c2', 'c3', 'c4']
            coverages = {'cA': ['c1_export', 'c2_export'], 'cB': ['c3_export'], 'cC': []}
            for contributor in contributors:
                self.__create_contributor(init_http_download_server.ip_addr, contributor)
            for cov, ds in coverages.items():
                self.__create_coverage(ds, cov)
            jobs_first_run = self.run_automatic_update()
            assert len(jobs_first_run) == 6
            contributor_export_jobs = list(
                filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT and job[
                                                                                                     'step'] == 'save_contributor_export',
                       jobs_first_run))
            coverage_export_jobs = list(
                filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_COVERAGE_EXPORT and job[
                                                                                                  'step'] == 'save_coverage_export',
                       jobs_first_run))
            assert len(contributor_export_jobs) == 4  # all contributor_export are launched
            assert len(coverage_export_jobs) == 2  # cA and cB launched (not cC because no contributors attached)

            # remove old jobs
            with app.app_context():
                mongo.db['jobs'].delete_many({})

            # update c1 data source
            self.update_data_source_url('c1', 'ds_c1',
                                        self.format_url(init_http_download_server.ip_addr, 'sample_1.zip'))
            frozen_datetime.move_to(datetime_from_string('2018-01-15 10:08:00 UTC'))
            jobs_second_run = self.run_automatic_update()
            contributor_export_jobs = list(
                filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_CONTRIBUTOR_EXPORT, jobs_second_run))
            coverage_export_jobs = list(
                filter(lambda job: job['action_type'] == ACTION_TYPE_AUTO_COVERAGE_EXPORT, jobs_second_run))
            assert len(contributor_export_jobs) == 4  # all contributor_export are launched
            assert len(coverage_export_jobs) == 1  # cA launched because c1 was updated
            contributor_export_unchanged_jobs = list(
                filter(lambda job: job['step'] == 'fetching data', contributor_export_jobs))
            # when data source url does not change it will not generate a coverage export
            assert len(contributor_export_unchanged_jobs) == 3
    def test_fetch_ok(self, init_http_download_server, contributor):
        ip = init_http_download_server.ip_addr
        url = self.format_url(ip, 'sample_1.zip')
        contributor['data_sources'].append({
            "name": "bobette",
            "data_format": "gtfs",
            "input": {
                "type": "auto",
                "url": url,
                "frequency": {
                    "type": "daily",
                    "hour_of_day": 20
                }
            }
        })
        raw = self.put('/contributors/id_test', params=self.dict_to_json(contributor))

        json_response = self.json_to_dict(raw)
        data_source_id = json_response['contributors'][0]['data_sources'][0]['id']

        raw = self.post('/contributors/{}/data_sources/{}/actions/fetch'.format(contributor['id'], data_source_id))

        self.assert_sucessful_call(raw, 204)

        data_source = \
            self.json_to_dict(self.get('contributors/{}/data_sources/{}'.format(contributor['id'], data_source_id)))[
                'data_sources'][0]
        data_set = data_source['data_sets'][0]
        # Test that source file and saved file are the same
        with app.app_context():
            gridout = GridFsHandler().get_file_from_gridfs(data_set['gridfs_id'])
            expected_path = _get_file_fixture_full_path('gtfs/sample_1.zip')

            with tempfile.TemporaryDirectory() as path:
                gridout_path = os.path.join(path, gridout.filename)
                with open(gridout_path, 'wb+') as f:
                    f.write(gridout.read())
                    assert_text_files_equals(gridout_path, expected_path)
            jobs = self.get_all_jobs()
            assert len(jobs) == 0
Exemple #15
0
    def test_get_job_with_details(self):
        now = datetime_from_string('2014-04-15 15:37:44 UTC')
        update = now + datetime.timedelta(days=1)
        update2 = now + datetime.timedelta(days=10)
        with app.app_context():
            Job(ACTION_TYPE_CONTRIBUTOR_EXPORT, 'cid1', None, None, 'pending', 'my-step', 'job1-id', now, update, "",
                'dsid').save()
            Job(ACTION_TYPE_COVERAGE_EXPORT, None, 'covid', None, 'failed', 'failed-step', 'job2-id', now, update2,
                "boom",
                'dsid').save()

        _, jobs = self.get_jobs()
        assert len(jobs) == 2

        job1 = self.get_job_details('job1-id')
        assert job1['id'] == 'job1-id'
        assert job1['action_type'] == ACTION_TYPE_CONTRIBUTOR_EXPORT
        assert job1['step'] == 'my-step'
        assert job1['state'] == 'pending'
        assert job1['started_at'] == '2014-04-15T15:37:44+00:00'
        assert job1['updated_at'] == '2014-04-16T15:37:44+00:00'
        assert job1['error_message'] == ''
        assert job1['contributor_id'] == 'cid1'
        assert not job1['coverage_id']
        assert not job1['parent_id']

        job2 = self.get_job_details('job2-id')
        assert job2['id'] == 'job2-id'
        assert job2['action_type'] == ACTION_TYPE_COVERAGE_EXPORT
        assert job2['step'] == 'failed-step'
        assert job2['state'] == 'failed'
        assert job2['started_at'] == '2014-04-15T15:37:44+00:00'
        assert job2['updated_at'] == '2014-04-25T15:37:44+00:00'
        assert job2['error_message'] == 'boom'
        assert job2['coverage_id'] == 'covid'
        assert not job2['contributor_id']
        assert not job2['parent_id']
 def test_prepare_external_settings(self, init_http_download_server):
     valid_process = {
         'type': 'ComputeExternalSettings',
         'input_data_source_ids': ['dsid'],
         'target_data_source_id': 'target_id',
         'sequence': 0,
         'configuration_data_sources': [
             {'name': 'perimeter', 'ids': ['perimeter_id']},
             {'name': 'lines_referential', 'ids': ['lines_referential_id']},
         ]
     }
     self.init_contributor('cid', 'dsid',
                           self.format_url(init_http_download_server.ip_addr, 'fr-idf-custo-post-fusio-sample.zip',
                                           'prepare_external_settings'), data_prefix='OIF')
     self.add_data_source_to_contributor('cid', 'perimeter_id',
                                         self.format_url(init_http_download_server.ip_addr, 'tr_perimeter_id.json',
                                                         'prepare_external_settings'), DATA_FORMAT_TR_PERIMETER)
     self.add_data_source_to_contributor('cid', 'lines_referential_id',
                                         self.format_url(init_http_download_server.ip_addr,
                                                         'lines_referential_id.json', 'prepare_external_settings'),
                                         DATA_FORMAT_LINES_REFERENTIAL)
     self.add_process_to_contributor(valid_process, 'cid')
     self.contributor_export('cid')
     target_grid_fs_id = self.get_gridfs_id_from_data_source('cid', 'target_id')
     with app.app_context():
         fusio_settings_zip_file = GridFsHandler().get_file_from_gridfs(target_grid_fs_id)
         with ZipFile(fusio_settings_zip_file, 'r') as fusio_settings_zip_file:
             with tempfile.TemporaryDirectory() as tmp_dir_name:
                 assert_zip_contains_only_files_with_extensions(fusio_settings_zip_file, ['txt'])
                 fusio_settings_zip_file.extractall(tmp_dir_name)
                 assert_text_files_equals(os.path.join(tmp_dir_name, 'fusio_object_codes.txt'),
                                          _get_file_fixture_full_path(
                                              'prepare_external_settings/expected_fusio_object_codes.txt'))
                 assert_text_files_equals(os.path.join(tmp_dir_name, 'fusio_object_properties.txt'),
                                          _get_file_fixture_full_path(
                                              'prepare_external_settings/expected_fusio_object_properties.txt'))
Exemple #17
0
def get_app_context():
    with app.app_context():
        yield
Exemple #18
0
def get_app_context():
    with app.app_context():
        yield
Exemple #19
0
def empty_mongo(docker):
    """Empty mongo db before each tests"""
    with app.app_context():
        mongo.db.client.drop_database(docker.DBNAME)
        models.init_mongo()
 def __assert_should_fetch(self, should_fetch):
     with app.app_context():
         data_source = DataSource.get_one(data_source_id='dsid')
         assert data_source.should_fetch() == should_fetch
Exemple #21
0
 def test_get_one_job_found(self):
     job_id = 'my_id'
     with app.app_context():
         Job(id=job_id).save()
     job = self.get_job_details(job_id)
     assert job['id'] == job_id
Exemple #22
0
def empty_mongo(docker):
    """Empty mongo db before each tests"""
    with app.app_context():
        mongo.db.client.drop_database(docker.DBNAME)
        models.init_mongo()