Exemple #1
0
def create_autocomplete_job_with_data_sets():
    with app.app_context():
        instance = models.Instance('fr')
        models.db.session.add(instance)
        models.db.session.commit()

        job = models.Job()
        job.instance = instance

        # we also create 2 datasets, one for fusio, one for autocomplete_cosmogony
        for i, dset_type in enumerate(['fusio', 'cosmogony']):
            dataset = models.DataSet()
            dataset.type = dset_type
            dataset.family_type = dataset.type
            if dataset.type == 'fusio':
                dataset.family_type = 'pt'
                dataset.name = '/path/to/dataset_{}'.format(i)
            else:
                dataset.family_type = 'autocomplete_cosmogony'
                dataset.name = '/path/to/dataset_cosmogony/cosmogony_europe.jsonl.gz'

            models.db.session.add(dataset)

            job.data_sets.append(dataset)

        job.state = 'done'
        models.db.session.add(job)
        models.db.session.commit()
Exemple #2
0
def build_data(instance):
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    instance_config = load_instance_config(instance.name)
    models.db.session.add(job)
    models.db.session.commit()
    chain(ed2nav.si(instance_config, job.id, None), finish_job.si(job.id)).delay()
    current_app.logger.info("Job build data of : %s queued"%instance.name)
Exemple #3
0
def send_to_mimir(instance, filename, family_type):
    """
    :param instance: instance to receive the data
    :param filename: file to inject towards mimir
    :param family_type: dataset's family type

    - create a job with a data_set
    - data injection towards mimir(stops2mimir, ntfs2mimir, poi2mimir)

    returns action list
    """

    # if mimir isn't setup do not try to import data for the autocompletion
    if not current_app.config.get('MIMIR_URL'):
        return []

    # Bail out if the family type is not one that mimir deals with.
    if family_type not in ['pt', 'poi']:
        return []

    # This test is to avoid creating a new job if there is no action on mimir.
    if not (instance.import_ntfs_in_mimir or instance.import_stops_in_mimir):
        return []

    actions = []
    job = models.Job()
    job.instance = instance
    job.state = 'running'

    dataset = models.DataSet()
    dataset.family_type = 'mimir'
    dataset.type = 'fusio'

    # currently the name of a dataset is the path to it
    dataset.name = filename
    models.db.session.add(dataset)
    job.data_sets.append(dataset)

    models.db.session.add(job)
    models.db.session.commit()

    if family_type == 'pt':
        # Import ntfs in Mimir
        if instance.import_ntfs_in_mimir:
            actions.append(ntfs2mimir.si(instance.name, filename, job.id, dataset_uid=dataset.uid))

        # Import stops in Mimir.
        # if we are loading pt data we might want to load the stops to autocomplete
        # This action is deprecated: https://github.com/CanalTP/mimirsbrunn/blob/4430eed1d81247fffa7cf32ba675a9c5ad8b1cbe/documentation/components.md#stops2mimir
        if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
            actions.append(stops2mimir.si(instance.name, filename, job.id, dataset_uid=dataset.uid))
    else:  # assume family_type == 'poi':
        actions.append(poi2mimir.si(instance.name, filename, job.id, dataset_uid=dataset.uid))

    actions.append(finish_job.si(job.id))
    return actions
Exemple #4
0
def reload_kraken(instance_id):
    instance = models.Instance.query.get(instance_id)
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    instance_config = load_instance_config(instance.name)
    models.db.session.add(job)
    models.db.session.commit()
    chain(reload_data.si(instance_config, job.id), finish_job.si(job.id)).delay()
    logging.info("Task reload kraken for instance {} queued".format(instance.name))
Exemple #5
0
def load_data(instance_id, data_path):
    instance = models.Instance.query.get(instance_id)
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    models.db.session.add(job)
    models.db.session.commit()
    files = glob.glob(data_path + "/*")

    import_data(files, instance, backup_file=False, async=False)
Exemple #6
0
def create_job(creation_date, dataset_type, backup_dir):
    job = models.Job()
    job.state = 'done'
    dataset_backup_dir = tempfile.mkdtemp(dir=backup_dir)
    dataset, metric = create_dataset(dataset_type, dataset_backup_dir)
    job.data_sets.append(dataset)
    job.metrics.append(metric)
    job.created_at = creation_date
    models.db.session.add(job)

    return job
Exemple #7
0
def reload_at(instance_id):
    instance = models.Instance.query.get(instance_id)
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    instance_config = load_instance_config(instance.name)
    models.db.session.add(job)
    models.db.session.commit()
    chain(nav2rt.si(instance_config, job.id),
          reload_data.si(instance_config, job.id),
          finish_job.si(job.id)).delay()
Exemple #8
0
def build_all_data():
    for instance in models.Instance.query.all():
        job = models.Job()
        job.instance = instance
        job.state = 'pending'
        instance_config = load_instance_config(instance.name)
        models.db.session.add(job)
        models.db.session.commit()
        chain(ed2nav.si(instance_config, job.id),
              nav2rt.si(instance_config, job.id)).delay()
        current_app.logger.info("Job  build data of : %s queued" %
                                instance.name)
Exemple #9
0
def create_job_with_state(state):
    job = models.Job()
    job.state = state

    dataset, metric = create_dataset("fusio")
    job.data_sets.append(dataset)
    job.metrics.append(metric)

    models.db.session.add(job)
    models.db.session.commit()

    return job
def create_job_with_all_dataset_types():

    job = models.Job()
    job.state = 'done'

    for dataset_type in ['fusio', 'osm', 'poi']:
        dataset, metric = create_dataset(dataset_type)
        job.data_sets.append(dataset)
        job.metrics.append(metric)

    models.db.session.add(job)

    return job
Exemple #11
0
def send_to_mimir(instance, filename):
    """
    :param instance: instance to receive the data
    :param filename: file to inject towards mimir

    - create a job with a data_set
    - data injection towards mimir(stops2mimir, ntfs2mimir)

    returns action list
    """
    # This test is to avoid creating a new job if there is no action on mimir.
    if not (instance.import_ntfs_in_mimir or instance.import_stops_in_mimir):
        return []

    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'

    dataset = models.DataSet()
    dataset.family_type = 'mimir'
    dataset.type = 'fusio'

    # currently the name of a dataset is the path to it
    dataset.name = filename
    models.db.session.add(dataset)
    job.data_sets.append(dataset)

    models.db.session.add(job)
    models.db.session.commit()

    # Import ntfs in Mimir
    if instance.import_ntfs_in_mimir:
        actions.append(
            ntfs2mimir.si(instance_config,
                          filename,
                          job.id,
                          dataset_uid=dataset.uid))

    # Import stops in Mimir
    # if we are loading pt data we might want to load the stops to autocomplete
    if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
        actions.append(
            stops2mimir.si(instance_config,
                           filename,
                           job.id,
                           dataset_uid=dataset.uid))

    actions.append(finish_job.si(job.id))
    return actions
Exemple #12
0
def import_autocomplete(files, autocomplete_instance, asynchronous=True, backup_file=True):
    """
    Import the autocomplete'instance data files
    """
    job = models.Job()
    actions = []

    task = {'bano': bano2mimir, 'oa': openaddresses2mimir, 'osm': osm2mimir, 'cosmogony': cosmogony2mimir}
    autocomplete_dir = current_app.config['TYR_AUTOCOMPLETE_DIR']

    # it's important for the admin to be loaded first, then addresses, then street, then poi
    import_order = ['cosmogony', 'bano', 'oa', 'osm']
    files_and_types = [(f, type_of_autocomplete_data(f)) for f in files]
    files_and_types = sorted(files_and_types, key=lambda f_t: import_order.index(f_t[1]))

    for f, ftype in files_and_types:
        dataset = models.DataSet()
        dataset.type = ftype
        dataset.family_type = 'autocomplete_{}'.format(dataset.type)
        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(f, autocomplete_instance.backup_dir(autocomplete_dir))
            else:
                filename = f
            actions.append(
                task[dataset.type].si(autocomplete_instance, filename=filename, dataset_uid=dataset.uid)
            )
        else:
            # unknown type, we skip it
            current_app.logger.debug("unknown file type: {} for file {}".format(dataset.type, f))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)
        job.autocomplete_params_id = autocomplete_instance.id

    if not actions:
        return

    models.db.session.add(job)
    models.db.session.commit()
    for action in actions:
        action.kwargs['job_id'] = job.id
    actions.append(finish_job.si(job.id))
    if asynchronous:
        return chain(*actions).delay(), job
    else:
        # all job are run in sequence and import_data will only return when all the jobs are finish
        return chain(*actions).apply(), job
Exemple #13
0
    def create_cities_job(creation_date, path, state):
        job = models.Job()
        job.state = state
        dataset_backup_dir = path

        dataset = models.DataSet()
        dataset.type = 'cities'
        dataset.family_type = 'cities_family'
        dataset.name = '{}'.format(dataset_backup_dir)
        models.db.session.add(dataset)

        job.data_sets.append(dataset)
        job.created_at = creation_date
        models.db.session.add(job)
def create_job_with_poi_only():

    dataset, metric = create_dataset('poi')

    job = models.Job()
    job.data_sets.append(dataset)
    job.metrics.append(metric)
    job.state = 'done'

    dataset, metric = create_dataset('poi')
    job.data_sets.append(dataset)
    job.metrics.append(metric)

    models.db.session.add(job)

    return job
Exemple #15
0
def add_job_and_data_set_with_jobstate_running(create_basic_job_with_data_sets):
    with app.app_context():
        # we add a new job with a dataset for mimir
        instance = get_instance_from_db(name='fr')
        job = models.Job()
        job.instance = instance
        dataset = models.DataSet()
        dataset.family_type = 'osm'
        dataset.type = 'osm'
        dataset.name = '/path/to/dataset_osm'
        models.db.session.add(dataset)
        job.data_sets.append(dataset)
        job.state = 'running'
        models.db.session.add(job)

        models.db.session.commit()
Exemple #16
0
def create_jobs_with_same_datasets(name, backup_dir):
    with app.app_context():
        job_list = []

        dataset_backup_dir = tempfile.mkdtemp(dir=backup_dir)
        for i in range(3):
            dataset, metric = create_dataset('poi', dataset_backup_dir)
            job = models.Job()
            job.state = 'done'
            job.data_sets.append(dataset)
            job.metrics.append(metric)
            job.created_at = datetime.utcnow() - timedelta(days=i)
            models.db.session.add(job)
            job_list.append(job)

        create_instance(name, job_list)
Exemple #17
0
def add_job_with_data_set_mimir(create_basic_job_with_data_sets):
    with app.app_context():
        # we also create 1 job with a dataset for mimir
        instance = get_instance_from_db(name='fr')
        job = models.Job()
        job.instance = instance
        dataset = models.DataSet()
        dataset.family_type = 'mimir'
        dataset.type = 'stop2mimir'
        dataset.name = '/path/to/dataset_3'
        models.db.session.add(dataset)
        job.data_sets.append(dataset)
        job.state = 'done'
        models.db.session.add(job)

        models.db.session.commit()
Exemple #18
0
def create_autocomplete_parameter():
    with app.app_context():
        autocomplete_param = models.AutocompleteParameter('idf', 'OSM', 'BANO','FUSIO', 'OSM', [8, 9])
        models.db.session.add(autocomplete_param)
        models.db.session.commit()

        # we also create 3 datasets, one for bano, 2 for osm
        for i, dset_type in enumerate(['bano', 'osm', 'osm']):
            job = models.Job()

            dataset = models.DataSet()
            dataset.type = dset_type
            dataset.family_type = 'autocomplete_{}'.format(dataset.type)
            dataset.name = '/path/to/dataset_{}'.format(i)
            models.db.session.add(dataset)

            job.autocomplete_params_id = autocomplete_param.id
            job.data_sets.append(dataset)
            job.state = 'done'
            models.db.session.add(job)
            models.db.session.commit()
Exemple #19
0
def create_basic_job_with_data_sets():
    with app.app_context():
        instance = models.Instance('fr')
        models.db.session.add(instance)
        models.db.session.commit()

        job = models.Job()
        job.instance = instance

        # we also create 2 datasets, one for fusio, one for synonym
        for i, dset_type in enumerate(['fusio', 'synonym']):
            dataset = models.DataSet()
            dataset.type = dset_type
            dataset.family_type = dataset.type
            if dataset.type == 'fusio':
                dataset.family_type = 'pt'
            dataset.name = '/path/to/dataset_{}'.format(i)
            models.db.session.add(dataset)

            job.data_sets.append(dataset)

        job.state = 'done'
        models.db.session.add(job)
        models.db.session.commit()
Exemple #20
0
def import_data(files, instance, backup_file):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'pending'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        dataset.type = type_of_data(_file)
        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename))
        else:
            #unknown type, we skip it
            current_app.logger.debug("unknwn file type: {} for file {}".format(
                dataset.type, _file))
            continue

        #currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        for action in actions:
            action.kwargs['job_id'] = job.id
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        binarisation = [
            ed2nav.si(instance_config, job.id),
            nav2rt.si(instance_config, job.id)
        ]
        aggregate = aggregate_places.si(instance_config, job.id)
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        actions.append(group(chain(*binarisation), aggregate))
        actions.append(reload_data.si(instance_config, job.id))
        actions.append(finish_job.si(job.id))
        chain(*actions).delay()
Exemple #21
0
def update_data():
    for instance in models.Instance.query.all():
        current_app.logger.debug("Update data of : %s" % instance.name)
        instance_config = load_instance_config(instance.name)
        files = glob.glob(instance_config.source_directory + "/*")
        actions = []
        job = models.Job()
        job.instance = instance
        job.state = 'pending'
        for _file in files:
            dataset = models.DataSet()
            filename = None

            dataset.type = type_of_data(_file)
            if dataset.type == 'gtfs':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(gtfs2ed.si(instance_config, filename))
            elif dataset.type == 'fusio':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(fusio2ed.si(instance_config, filename))
            elif dataset.type == 'osm':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(osm2ed.si(instance_config, filename))
            elif dataset.type == 'geopal':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(geopal2ed.si(instance_config, filename))
            elif dataset.type == 'fare':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(fare2ed.si(instance_config, filename))
            elif dataset.type == 'poi':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(poi2ed.si(instance_config, filename))
            elif dataset.type == 'synonym':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(synonym2ed.si(instance_config, filename))
            else:
                #unknown type, we skip it
                continue

            #currently the name of a dataset is the path to it
            dataset.name = filename
            models.db.session.add(dataset)
            job.data_sets.append(dataset)

        if actions:
            models.db.session.add(job)
            models.db.session.commit()
            for action in actions:
                action.kwargs['job_id'] = job.id
            #We pass the job id to each tasks, but job need to be commited for
            #having an id
            binarisation = [
                ed2nav.si(instance_config, job.id),
                nav2rt.si(instance_config, job.id)
            ]
            aggregate = aggregate_places.si(instance_config, job.id)
            #We pass the job id to each tasks, but job need to be commited for
            #having an id
            actions.append(group(chain(*binarisation), aggregate))
            actions.append(reload_data.si(instance_config, job.id))
            actions.append(finish_job.si(job.id))
            chain(*actions).delay()
Exemple #22
0
def send_to_mimir(instance, filename, family_type):
    """
    :param instance: instance to receive the data
    :param filename: file to inject towards mimir
    :param family_type: dataset's family type

    - create a job with a data_set
    - data injection towards mimir(stops2mimir, ntfs2mimir, poi2mimir)

    returns action list
    """

    # if mimir isn't setup do not try to import data for the autocompletion
    if not any([
            is_activate_autocomplete_version(2)
            or is_activate_autocomplete_version(7)
    ]):
        return []

    # Bail out if the family type is not one that mimir deals with.
    if family_type not in ['pt', 'poi']:
        return []

    # This test is to avoid creating a new job if there is no action on mimir.
    if not (instance.import_ntfs_in_mimir or instance.import_stops_in_mimir):
        return []

    actions = []
    job = models.Job()
    job.instance = instance
    job.state = 'running'

    if is_activate_autocomplete_version(7):
        dataset_es7 = create_and_get_dataset(ds_type="fusio",
                                             family_type="mimir7",
                                             filename=filename)
        models.db.session.add(dataset_es7)
        job.data_sets.append(dataset_es7)

    if is_activate_autocomplete_version(2):
        dataset_es2 = create_and_get_dataset(ds_type="fusio",
                                             family_type="mimir",
                                             filename=filename)
        models.db.session.add(dataset_es2)
        job.data_sets.append(dataset_es2)

    models.db.session.add(job)
    models.db.session.commit()

    for version in (2, 7):
        if not is_activate_autocomplete_version(version):
            logging.getLogger(__name__).info(
                "Disable import mimir version {}".format(version))
            continue
        ds = dataset_es7 if version == 7 else dataset_es2
        if family_type == 'pt':
            # Import ntfs in Mimir
            if instance.import_ntfs_in_mimir:
                actions.append(
                    ntfs2mimir.si(instance.name,
                                  filename,
                                  version,
                                  job.id,
                                  dataset_uid=ds.uid))
            # Import stops in Mimir.
            # if we are loading pt data we might want to load the stops to autocomplete
            # This action is deprecated: https://github.com/hove-io/mimirsbrunn/blob/4430eed1d81247fffa7cf32ba675a9c5ad8b1cbe/documentation/components.md#stops2mimir
            if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
                actions.append(
                    stops2mimir.si(instance.name,
                                   filename,
                                   version,
                                   job.id,
                                   dataset_uid=ds.uid))
        else:  # assume family_type == 'poi':
            actions.append(
                poi2mimir.si(instance.name,
                             filename,
                             version,
                             job.id,
                             dataset_uid=ds.uid))

    actions.append(finish_job.si(job.id))
    return actions
Exemple #23
0
def import_autocomplete(files,
                        autocomplete_instance,
                        asynchronous=True,
                        backup_file=True):
    """
    Import the autocomplete'instance data files
    """
    job = models.Job()
    job.state = 'running'
    actions = []
    task = {
        'bano': {
            2: bano2mimir,
            7: bano2mimir
        },
        'oa': {
            2: openaddresses2mimir,
            7: openaddresses2mimir
        },
        'osm': {
            2: osm2mimir,
            7: osm2mimir
        },
        'cosmogony': {
            2: cosmogony2mimir,
            7: cosmogony2mimir
        },
    }
    autocomplete_dir = current_app.config['TYR_AUTOCOMPLETE_DIR']

    # it's important for the admin to be loaded first, then addresses, then street, then poi
    import_order = ['cosmogony', 'bano', 'oa', 'osm']
    files_and_types = [(f, type_of_autocomplete_data(f)) for f in files]
    files_and_types = sorted(files_and_types,
                             key=lambda f_t: import_order.index(f_t[1]))

    for f, ftype in files_and_types:
        if ftype not in task:
            # unknown type, we skip it
            current_app.logger.debug(
                "unknown file type: {} for file {}".format(ftype, f))
            continue
        filename = f
        if backup_file:
            filename = move_to_backupdirectory(
                f,
                autocomplete_instance.backup_dir(autocomplete_dir),
                manage_sp_char=True)

        for version, executable in task[ftype].items():
            if not is_activate_autocomplete_version(version):
                current_app.logger.debug(
                    "Autocomplete version {} is disableed".format(version))
                continue
            dataset = create_and_get_dataset(
                ds_type=ftype,
                family_type='autocomplete_{}'.format(ftype),
                filename=filename)
            actions.append(
                executable.si(
                    autocomplete_instance,
                    filename=filename,
                    job_id=job.id,
                    dataset_uid=dataset.uid,
                    autocomplete_version=version,
                ))
            models.db.session.add(dataset)
            job.data_sets.append(dataset)
            job.autocomplete_params_id = autocomplete_instance.id

    if not actions:
        return

    models.db.session.add(job)
    models.db.session.commit()
    for action in actions:
        action.kwargs['job_id'] = job.id
    actions.append(finish_job.si(job.id))
    if asynchronous:
        return chain(*actions).delay(), job
    else:
        # all job are run in sequence and import_data will only return when all the jobs are finish
        return chain(*actions).apply(), job
Exemple #24
0
    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param async: If True all jobs are run in background, else the jobs are run in sequence the function will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'pending'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    for _file in files:
Exemple #25
0
def import_data(
    files,
    instance,
    backup_file,
    asynchronous=True,
    reload=True,
    custom_output_dir=None,
    skip_mimir=False,
    skip_2ed=False,
):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param asynchronous: If True all jobs are run in background, else the jobs are run in sequence the function
     will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment
    :param custom_output_dir: subdirectory for the nav file created. If not given, the instance default one is taken
    :param skip_mimir: skip importing data into mimir
    :param skip_2ed: skip inserting last_load_dataset files into ed database
    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    def process_ed2nav():
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(
                    send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))

        # We should delete old backup directories related to this instance
        actions.append(
            purge_instance.si(
                instance.id,
                current_app.config['DATASET_MAX_BACKUPS_TO_KEEP']))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()

    if skip_2ed:
        # For skip_2ed, skip inserting last_load_dataset files into ed database
        return process_ed2nav()
    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        try:
            dataset.type, _ = utils.type_of_data(_file)
            dataset.family_type = utils.family_of_data(dataset.type)
        except Exception:
            if backup_file:
                move_to_backupdirectory(_file,
                                        instance_config.backup_directory)
            current_app.logger.debug(
                "Corrupted source file : {} moved to {}".format(
                    _file, instance_config.backup_directory))
            continue

        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(
                    _file,
                    instance_config.backup_directory,
                    manage_sp_char=True)
            else:
                filename = _file

            has_pt_planner_loki = (
                hasattr(instance, 'pt_planners_configurations')
                and "loki" in instance.pt_planners_configurations)
            if has_pt_planner_loki:
                loki_data_source = instance.pt_planners_configurations.get(
                    'loki', {}).get('data_source')
                if loki_data_source is not None:
                    if loki_data_source == "minio":
                        if dataset.type == "fusio":
                            actions.append(
                                fusio2s3.si(instance_config,
                                            filename,
                                            dataset_uid=dataset.uid))
                        if dataset.type == "gtfs":
                            actions.append(
                                gtfs2s3.si(instance_config,
                                           filename,
                                           dataset_uid=dataset.uid))
                    elif loki_data_source == "local" and dataset.type in [
                            "fusio", "gtfs"
                    ]:
                        zip_file = zip_if_needed(filename)
                        dest = os.path.join(
                            os.path.dirname(instance_config.target_file),
                            "ntfs.zip")
                        shutil.copy(zip_file, dest)
                    else:
                        current_app.logger.debug(
                            "unknown loki data_source '{}' for coverage '{}'".
                            format(loki_data_source, instance.name))

            actions.append(task[dataset.type].si(instance_config,
                                                 filename,
                                                 dataset_uid=dataset.uid))
        else:
            # unknown type, we skip it
            current_app.logger.debug(
                "unknown file type: {} for file {}".format(
                    dataset.type, _file))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        dataset.state = "pending"
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        return process_ed2nav()
Exemple #26
0
def import_data(
    files, instance, backup_file, asynchronous=True, reload=True, custom_output_dir=None, skip_mimir=False
):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param asynchronous: If True all jobs are run in background, else the jobs are run in sequence the function
     will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment
    :param custom_output_dir: subdirectory for the nav file created. If not given, the instance default one is taken
    :param skip_mimir: skip importing data into mimir

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        try:
            dataset.type, _ = utils.type_of_data(_file)
            dataset.family_type = utils.family_of_data(dataset.type)
        except Exception:
            if backup_file:
                move_to_backupdirectory(_file, instance_config.backup_directory)
            current_app.logger.debug(
                "Corrupted source file : {} moved to {}".format(_file, instance_config.backup_directory)
            )
            continue

        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(_file, instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename, dataset_uid=dataset.uid))
        else:
            # unknown type, we skip it
            current_app.logger.debug("unknown file type: {} for file {}".format(dataset.type, _file))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()