예제 #1
0
def find_new_sentinel2_scenes(*args, **kwargs):
    """Find new Sentinel 2 scenes and kick off imports

    Uses the execution date to determine what day to check for imports
    """

    logging.info("Finding Sentinel-2 scenes...")
    execution_date = kwargs['execution_date']
    tilepaths = find_sentinel2_scenes(execution_date.year,
                                      execution_date.month, execution_date.day)

    dag_id = 'import_sentinel2_scenes'

    # Split into groups for more efficient jobs
    num_groups = 32 if len(tilepaths) >= 32 else len(tilepaths)
    logger.info('Kicking off %s dags to import scene groups', num_groups)
    tilepath_groups = chunkify(tilepaths, num_groups)
    for idx, path_group in enumerate(tilepath_groups):
        slug_path = '_'.join(path_group[0].split('/'))
        run_id = 'sentinel2_import_{year}_{month}_{day}_{idx}_{slug}'.format(
            year=execution_date.year,
            month=execution_date.month,
            day=execution_date.day,
            idx=idx,
            slug=slug_path)
        logger.info('Kicking off new scene import: %s', run_id)
        conf = json.dumps({'tilepaths': path_group})
        dag_args = DagArgs(dag_id=dag_id, conf=conf, run_id=run_id)
        trigger_dag(dag_args)
    return "Finished kicking off new Sentinel-2 dags"
예제 #2
0
 def test_cli_list_dag_runs(self):
     cli.trigger_dag(self.parser.parse_args([
         'dags', 'trigger', 'example_bash_operator', ]))
     args = self.parser.parse_args(['dags', 'list_runs',
                                    'example_bash_operator',
                                    '--no_backfill'])
     cli.list_dag_runs(args)
예제 #3
0
파일: test_cli.py 프로젝트: tedb19/airflow
 def test_trigger_dag(self):
     cli.trigger_dag(
         self.parser.parse_args([
             'dags', 'trigger', 'example_bash_operator', '-c',
             '{"foo": "bar"}'
         ]))
     self.assertRaises(
         ValueError, cli.trigger_dag,
         self.parser.parse_args([
             'dags', 'trigger', 'example_bash_operator', '--run_id',
             'trigger_dag_xxx', '-c', 'NOT JSON'
         ]))
예제 #4
0
def find_geotiffs(*args, **kwargs):
    """Find geotiffs which match the bucket and prefix and kick off imports
    """

    logging.info("Finding geotiff scenes...")

    conf = kwargs['dag_run'].conf

    bucket = conf.get('bucket')
    prefix = conf.get('prefix')

    execution_date = kwargs['execution_date']

    try:
        tilepaths = find_geotiff_scenes(
            bucket, prefix
        )
    except:
        logger.error('encountered error finding tile paths')
        raise


    dag_id = 'import_geotiff_scenes'

    group_max = int(os.getenv('AIRFLOW_CHUNK_SIZE', 32))
    num_groups = group_max if len(tilepaths) >= group_max else len(tilepaths)
    logger.info('Kicking off %s dags to import scene groups', num_groups)

    tilepath_groups = chunkify(tilepaths, num_groups)
    for idx, path_group in enumerate(tilepath_groups):
        slug_path = '_'.join(path_group[0].split('/'))
        run_id = 'geotiff_import_{year}_{month}_{day}_{idx}_{slug}'.format(
            year=execution_date.year, month=execution_date.month, day=execution_date.day,
            idx=idx, slug=slug_path
        )
        logger.info('Kicking off new scene import: %s', run_id)
        conf['tilepaths'] = path_group
        confjson = json.dumps(conf)
        dag_args = DagArgs(dag_id=dag_id, conf=confjson, run_id=run_id)
        trigger_dag(dag_args)

    logger.info('Finished kicking off new Geotiff scene dags')
예제 #5
0
def check_for_scenes_to_ingest():
    """Requests uningested scenes, kicks off ingest DAG for each scene

    Notes
      At some point this should batch scene ingests together, but for now
      they are kept separate for debugging and because the ingests themselves
      do not parallelize well
    """
    logger.info("Requesting uningested scenes...")
    scenes = get_uningested_scenes()

    dag_id = 'ingest_project_scenes'

    if len(scenes) == 0:
        return 'No scenes to ingest'

    logger.info('Kicking off ingests for %s scenes', len(scenes))
    for scene in scenes:
        run_id = 'scene_ingest_{}_{}'.format(scene['id'], time())
        logger.info('Kicking off new scene ingest: %s', run_id)
        conf = json.dumps({'scene': scene})
        dag_args = DagArgs(dag_id=dag_id, conf=conf, run_id=run_id)
        trigger_dag(dag_args)
    return "Finished kicking off ingests"
    def create(self, validated_data):

        # TODO: Importar Jinja 2
        # TODO: Crear el diccionario
        execution = Execution.objects.get(pk=validated_data['execution_id'])
        min_long, max_long, min_lat, max_lat = self.get_area(
            validated_data['parameters'])
        params = dict(self.get_kwargs(validated_data['parameters']))
        params['lat'] = (min_lat, max_lat)
        params['lon'] = (min_long, max_long)
        params['products'] = self.get_product(validated_data['parameters'])
        params['time_ranges'] = self.get_time_periods(
            validated_data['parameters'])
        params['execID'] = 'exec_{}'.format(str(
            validated_data['execution_id']))
        params['elimina_resultados_anteriores'] = True
        params['genera_mosaico'] = validated_data['generate_mosaic']

        # params['owner'] = Execution.executed_by.
        params['owner'] = "API-REST"
        # TODO: Cargar el template

        template_path = os.path.join(os.environ['TEMPLATE_PATH'],
                                     slugify(validated_data['algorithm_name']))
        generic_template_path = os.path.join(os.environ['TEMPLATE_PATH'],
                                             "generic-template")

        if execution.version is not None and execution.version.publishing_state == Version.PUBLISHED_STATE and os.path.exists(
                template_path):
            file_loader = FileSystemLoader(template_path)
            env = Environment(loader=file_loader)
            algorithm_template_path = '{}_{}.py'.format(
                slugify(validated_data['algorithm_name']),
                validated_data['version_id'])
            template = env.get_template(algorithm_template_path)
        else:
            file_loader = FileSystemLoader(generic_template_path)
            env = Environment(loader=file_loader)
            algorithm_template_path = '{}_{}.py'.format(
                "generic-template", "1.0")
            params['algorithm_name'] = slugify(
                validated_data['algorithm_name'])
            params['algorithm_version'] = validated_data['version_id']
            template = env.get_template(algorithm_template_path)

        # TODO: Renderizar el template
        airflow_dag_path = os.environ['AIRFLOW_DAG_PATH']
        execution_dag_path = '{}/exec_{}.py'.format(
            airflow_dag_path, str(validated_data['execution_id']))
        output = template.render(params=params)
        with open(execution_dag_path, 'w') as dag:
            dag.write("from airflow.operators import CompressFileSensor\n")
            dag.write("from cdcol_utils import other_utils\n")
            dag.write(output)
            dag.write(
                "\nsensor_fin_ejecucion = CompressFileSensor(task_id='sensor_fin_ejecucion',poke_interval=60, soft_fail=True,mode='reschedule', queue='util', dag=dag) \n"
            )
            dag.write(
                "comprimir_resultados = PythonOperator(task_id='comprimir_resultados',provide_context=True,python_callable=other_utils.compress_results,queue='util',op_kwargs={'execID': args['execID']},dag=dag) \n"
            )
            dag.write("sensor_fin_ejecucion >> comprimir_resultados \n")
        dag.close()
        execution.dag_id = params['execID']
        execution.save()

        # TODO: Ejecutar workflow
        bash_command1 = '/home/cubo/anaconda/bin/airflow list_dags'
        bash_command2 = '/home/cubo/anaconda/bin/airflow unpause' + params[
            'execID']

        subprocess.call(bash_command1.split())
        subprocess.call(bash_command2.split())

        dagbag = models.DagBag(settings.DAGS_FOLDER)
        dagbag.collect_dags()
        dagbag.process_file(filepath=execution_dag_path)

        args = argparse.Namespace()
        args.dag_id = params['execID']
        args.run_id = None
        args.exec_id = None
        args.conf = None
        args.exec_date = None
        args.subdir = None
        #cli.set_is_paused(False, args=args)
        cli.trigger_dag(args)

        # TODO: Modificar la ejecución en la base de datos

        # time_ranges = self.get_time_periods(validated_data['parameters'])
        #
        # gtask_parameters = {}
        # gtask_parameters['execID'] = str(validated_data['execution_id'])
        # gtask_parameters['algorithm'] = validated_data['algorithm_name']
        # gtask_parameters['version'] = validated_data['version_id']
        # gtask_parameters['output_expression'] = ''
        # gtask_parameters['product'], gtask_parameters['bands'] = self.get_product(validated_data['parameters'])
        # gtask_parameters = dict(self.get_kwargs(validated_data['parameters']), **gtask_parameters)
        #
        # gtask = import_module(os.environ['GEN_TASK_MOD'])
        # # flower = os.environ['FLOWER']

        # for key in gtask_parameters:
        #	print 'param \'' + key + '\': ' + str(gtask_parameters[key])

        # result = gtask.generic_task(min_long=min_long, min_lat=min_lat, **gtask_parameters)

        # if validated_data['is_gif']:
        #     gtask_parameters['min_lat'] = int(min_lat)
        #     gtask_parameters['min_long'] = int(min_long)
        #     result = group(
        #         gtask.generic_task.s(time_ranges=[("01-01-" + str(A), +"31-12-" + str(A))], **gtask_parameters) for A in
        #         xrange(int(time_ranges[0][0].split('-')[2]), int(time_ranges[0][1].split('-')[2]) + 1)).delay()
        #     for each_result in result.results:
        #         new_task = {
        #             'uuid': each_result.id,
        #             'state': '1',
        #             'execution_id': gtask_parameters['execID'],
        #             'state_updated_at': str(datetime.datetime.now()),
        #             'created_at': str(datetime.datetime.now()),
        #             'updated_at': str(datetime.datetime.now()),
        #             'start_date': str(datetime.date.today()),
        #             'end_date': str(datetime.date.today()),
        #
        #         }
        #         Task.objects.create(**new_task)
        # else:
        #     gtask_parameters['time_ranges'] = time_ranges
        #     result = group(gtask.generic_task.s(min_lat=Y, min_long=X, **gtask_parameters) for Y in
        #                    xrange(int(min_lat), int(max_lat)) for X in xrange(int(min_long), int(max_long))).delay()
        #     for each_result in result.results:
        #         # try:
        #         # 	task = json.loads(urlopen(flower + '/api/task/info/'+each_result.id).read())
        #         # except:
        #         # 	task = {'kwargs':''}
        #         new_task = {
        #             'uuid': each_result.id,
        #             'state': '1',
        #             'execution_id': gtask_parameters['execID'],
        #             'state_updated_at': str(datetime.datetime.now()),
        #             'created_at': str(datetime.datetime.now()),
        #             'updated_at': str(datetime.datetime.now()),
        #             'start_date': str(datetime.date.today()),
        #             'end_date': str(datetime.date.today()),
        #             # 'parameters': json.dumps(each_result.__dict__),
        #         }
        #         Task.objects.create(**new_task)

        return validated_data