Python si Examples, tyr.binarisation.reload_data.si Python Examples

Example #1

0

Show file

File: tasks.py Project: xlqian/navitia

    def process_ed2nav():
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(
                    send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))

        # We should delete old backup directories related to this instance
        actions.append(
            purge_instance.si(
                instance.id,
                current_app.config['DATASET_MAX_BACKUPS_TO_KEEP']))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()

Example #2

0

Show file

def reload_kraken(instance_id):
    instance = models.Instance.query.get(instance_id)
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    instance_config = load_instance_config(instance.name)
    models.db.session.add(job)
    models.db.session.commit()
    chain(reload_data.si(instance_config, job.id), finish_job.si(job.id)).delay()
    logging.info("Task reload kraken for instance {} queued".format(instance.name))

Example #3

0

Show file

File: tasks.py Project: Tristramg/navitia

def reload_at(instance_id):
    instance = models.Instance.query.get(instance_id)
    job = models.Job()
    job.instance = instance
    job.state = 'pending'
    instance_config = load_instance_config(instance.name)
    models.db.session.add(job)
    models.db.session.commit()
    chain(nav2rt.si(instance_config, job.id),
          reload_data.si(instance_config, job.id),
          finish_job.si(job.id)).delay()

Example #4

0

Show file

        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        for action in actions:
            action.kwargs['job_id'] = job.id
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        actions.append(chain(*binarisation))
        if reload:
            actions.append(reload_data.si(instance_config, job.id))
        actions.append(finish_job.si(job.id))
        if async:
            chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            chain(*actions).apply()


@celery.task()
def update_data():
    for instance in models.Instance.query.all():
        current_app.logger.debug("Update data of : {}".format(instance.name))
        instance_config = load_instance_config(instance.name)
        files = glob.glob(instance_config.source_directory + "/*")
        import_data(files, instance, backup_file=True)

Example #5

0

Show file

def import_data(
    files, instance, backup_file, asynchronous=True, reload=True, custom_output_dir=None, skip_mimir=False
):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param asynchronous: If True all jobs are run in background, else the jobs are run in sequence the function
     will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment
    :param custom_output_dir: subdirectory for the nav file created. If not given, the instance default one is taken
    :param skip_mimir: skip importing data into mimir

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        try:
            dataset.type, _ = utils.type_of_data(_file)
            dataset.family_type = utils.family_of_data(dataset.type)
        except Exception:
            if backup_file:
                move_to_backupdirectory(_file, instance_config.backup_directory)
            current_app.logger.debug(
                "Corrupted source file : {} moved to {}".format(_file, instance_config.backup_directory)
            )
            continue

        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(_file, instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename, dataset_uid=dataset.uid))
        else:
            # unknown type, we skip it
            current_app.logger.debug("unknown file type: {} for file {}".format(dataset.type, _file))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()

Example #6

0

Show file

File: tasks.py Project: skywave/navitia

def update_data():
    for instance in models.Instance.query.all():
        current_app.logger.debug("Update data of : %s" % instance.name)
        instance_config = load_instance_config(instance.name)
        files = glob.glob(instance_config.source_directory + "/*")
        actions = []
        job = models.Job()
        job.instance = instance
        job.state = 'pending'
        for _file in files:
            dataset = models.DataSet()
            filename = None

            dataset.type = type_of_data(_file)
            if dataset.type == 'gtfs':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(gtfs2ed.si(instance_config, filename))
            elif dataset.type == 'fusio':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(fusio2ed.si(instance_config, filename))
            elif dataset.type == 'osm':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(osm2ed.si(instance_config, filename))
            elif dataset.type == 'geopal':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(geopal2ed.si(instance_config, filename))
            elif dataset.type == 'fare':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(fare2ed.si(instance_config, filename))
            elif dataset.type == 'poi':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(poi2ed.si(instance_config, filename))
            elif dataset.type == 'synonym':
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
                actions.append(synonym2ed.si(instance_config, filename))
            else:
                #unknown type, we skip it
                continue

            #currently the name of a dataset is the path to it
            dataset.name = filename
            models.db.session.add(dataset)
            job.data_sets.append(dataset)

        if actions:
            models.db.session.add(job)
            models.db.session.commit()
            for action in actions:
                action.kwargs['job_id'] = job.id
            #We pass the job id to each tasks, but job need to be commited for
            #having an id
            binarisation = [
                ed2nav.si(instance_config, job.id),
                nav2rt.si(instance_config, job.id)
            ]
            aggregate = aggregate_places.si(instance_config, job.id)
            #We pass the job id to each tasks, but job need to be commited for
            #having an id
            actions.append(group(chain(*binarisation), aggregate))
            actions.append(reload_data.si(instance_config, job.id))
            actions.append(finish_job.si(job.id))
            chain(*actions).delay()

Example #7

0

Show file

def import_data(files, instance, backup_file):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'pending'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        dataset.type = type_of_data(_file)
        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(
                    _file, instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename))
        else:
            #unknown type, we skip it
            current_app.logger.debug("unknwn file type: {} for file {}".format(
                dataset.type, _file))
            continue

        #currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        for action in actions:
            action.kwargs['job_id'] = job.id
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        binarisation = [
            ed2nav.si(instance_config, job.id),
            nav2rt.si(instance_config, job.id)
        ]
        aggregate = aggregate_places.si(instance_config, job.id)
        #We pass the job id to each tasks, but job need to be commited for
        #having an id
        actions.append(group(chain(*binarisation), aggregate))
        actions.append(reload_data.si(instance_config, job.id))
        actions.append(finish_job.si(job.id))
        chain(*actions).delay()