def execute(self, context):
        if self.provide_context:
            context.update(self.op_kwargs)
            self.op_kwargs = context

        session = settings.Session()
        created_dr_ids = []
        for dro in self.python_callable(*self.op_args, **self.op_kwargs):
            if not dro:
                break
            if not isinstance(dro, DagRunOrder):
                dro = DagRunOrder(payload=dro)

            now = dt.utcnow()
            if dro.run_id is None:
                dro.run_id = 'trig__' + now.isoformat()

            dbag = DagBag(settings.DAGS_FOLDER)
            trigger_dag = dbag.get_dag(self.trigger_dag_id)
            dr = trigger_dag.create_dagrun(
                run_id=dro.run_id,
                execution_date=now,
                state=State.RUNNING,
                conf=dro.payload,
                external_trigger=True,
            )
            created_dr_ids.append(dr.id)
            self.log.info("Created DagRun %s, %s", dr, now)

        if created_dr_ids:
            session.commit()
            context['ti'].xcom_push(self.CREATED_DAGRUN_KEY, created_dr_ids)
        else:
            self.log.info("No DagRun created")
        session.close()
Example #2
0
def trigger_preprocessing(context):
    """ calls the preprocessing dag: pass the filenames of the stuff to process """
    # we have a jpg, xml, small mrc and large mrc, and gainref dm4 file
    # assume the common filename is the same and allow the  preprocessing dag wait for the other files? what happens if two separate calls to the same dag occur?
    found = {}
    if context == None:
        return

    for f in context['ti'].xcom_pull(task_ids='rsync', key='return_value'):
        this = Path(f).resolve().stem
        for pattern in (r'\-\d+$', r'\-gain\-ref$'):
            if re.search(pattern, this):
                this = re.sub(pattern, '', this)
            # LOG.warn("mapped: %s -> %s" % (f, this))
        #LOG.info("this: %s, f: %s" % (this,f))
        # EPU: only care about the xml file for now (let the dag deal with the other files
        if f.endswith('.xml') and not f.startswith(
                'Atlas') and not f.startswith('Tile_') and '_Data_' in f:
            #LOG.warn("found EPU metadata %s" % this )
            found[this] = True
        # serialEM: just look for tifs
        elif f.endswith('.tif'):
            m = re.match(r'^(?P<base>.*\_\d\d\d\d\d)(\_.*)?\.tif$', f)
            if m:
                #LOG.info('found %s' % (m.groupdict()['base'],) )
                found[m.groupdict()['base']] = True
        # tomography file
        elif '[' in this and ']' in this:
            t = this.split(']')[0] + ']'
            found[t] = True

    for base_filename, _ in sorted(found.items()):
        sample = context['ti'].xcom_pull(task_ids='config', key='sample')
        inst = context['ti'].xcom_pull(task_ids='config', key='instrument')
        name = context['ti'].xcom_pull(task_ids='config', key='experiment')

        run_id = '%s__%s' % (name, base_filename)
        dro = DagRunOrder(run_id=run_id)

        d = sample['params']

        d['directory'] = context['ti'].xcom_pull(
            task_ids='config',
            key='experiment_directory') + '/' + sample['guid'] + '/'
        d['base'] = base_filename
        d['experiment'] = name
        d['microscope'] = inst['_id']
        d['cs'] = inst['params']['cs']
        d['keV'] = inst['params']['keV']

        # only do single-particle
        if bool(strtobool(str(
                d['preprocess/enable']))) and d['imaging_method'] in (
                    'single-particle', ):
            LOG.info('triggering dag %s with %s' % (run_id, d))
            dro.payload = d
            yield dro
    return
Example #3
0
    def execute(self, context):
        if self.execution_date is not None:
            run_id = 'trig__{}'.format(self.execution_date)
            self.execution_date = timezone.parse(self.execution_date)
        else:
            run_id = 'trig__' + timezone.utcnow().isoformat()
        dro = DagRunOrder(run_id=run_id)
        if self.python_callable is not None:
            dro = self.python_callable(context, dro)
        if dro:
            dbag = DagBag(settings.DAGS_FOLDER)
            trigger_dag = dbag.get_dag(self.trigger_dag_id)

            if not trigger_dag.get_dagrun(execution_date=self.execution_date):
                dr = trigger_dag.create_dagrun(
                    run_id=dro.run_id,
                    state=State.RUNNING,
                    conf=json.dumps(dro.payload),
                    execution_date=self.execution_date,
                    external_trigger=True)
                logging.info("Creating DagRun %s" % dr)
            else:
                trigger_dag.clear(start_date=self.execution_date,
                                  end_date=self.execution_date,
                                  only_failed=False,
                                  only_running=False,
                                  confirm_prompt=False,
                                  reset_dag_runs=True,
                                  include_subdags=False,
                                  dry_run=False)
                logging.info("Cleared DagRun %s" % trigger_dag)
        else:
            self.log.info("Criteria not met, moving on")
Example #4
0
def generate_dag_run(**context):
    """Callable can depend on the context"""
    for i in range(10):
        yield DagRunOrder(payload={
            'timeout': "%i",
            'ds': context["ds"],
        })
 def flex_maybe_spawn(**kwargs):
     """
     This is a generator which returns appropriate DagRunOrders
     """
     print('kwargs:')
     pprint(kwargs)
     print('dag_run conf:')
     ctx = kwargs['dag_run'].conf
     pprint(ctx)
     collectiontype = kwargs['ti'].xcom_pull(key='collectiontype',
                                             task_ids="check_uuids")
     assay_type = kwargs['ti'].xcom_pull(key='assay_type',
                                         task_ids="check_uuids")
     lz_paths = kwargs['ti'].xcom_pull(key='lz_paths',
                                       task_ids="check_uuids")
     uuids = kwargs['ti'].xcom_pull(key='uuids', task_ids="check_uuids")
     print('collectiontype: <{}>, assay_type: <{}>'.format(
         collectiontype, assay_type))
     print(f'uuids: {uuids}')
     print('lz_paths:')
     pprint(lz_paths)
     payload = {
         'ingest_id': kwargs['run_id'],
         'crypt_auth_tok': kwargs['crypt_auth_tok'],
         'parent_lz_path': lz_paths,
         'parent_submission_id': uuids,
         'metadata': {},
         'dag_provenance_list': utils.get_git_provenance_list(__file__)
     }
     for next_dag in utils.downstream_workflow_iter(collectiontype,
                                                    assay_type):
         yield next_dag, DagRunOrder(payload=payload)
Example #6
0
def trigger_preprocessing(context):
    """ calls the preprocessing dag: pass the filenames of the stuff to process """
    # we have a jpg, xml, small mrc and large mrc, and gainref dm4 file
    # assume the common filename is the same and allow the  preprocessing dag wait for the other files? what happens if two separate calls to the same dag occur?
    found = {}
    if context == None:
        return

    for f in context['ti'].xcom_pull(task_ids='rsync_data',
                                     key='return_value'):
        this = Path(f).resolve().stem
        for pattern in (r'\-\d+$', r'\-gain\-ref$'):
            if re.search(pattern, this):
                this = re.sub(pattern, '', this)
            # LOG.warn("mapped: %s -> %s" % (f, this))
        # only care about the xml file for now (let the dag deal with the other files
        if f.endswith('.xml'):
            LOG.warn("found EPU metadata %s" % this)
            found[this] = True

    # now = datetime.utcnow().replace(microsecond=0)
    for base_filename, _ in found.items():
        exp = context['ti'].xcom_pull(task_ids='parse_config',
                                      key='experiment')
        # this = datetime.utcnow().replace(microsecond=0)
        # if now == this:
        #     sleep( 1 )
        #     this = datetime.utcnow().replace(microsecond=0)
        # run_id='%s__%s' % (exp['microscope'], this.isoformat())
        run_id = '%s_%s__%s' % (exp['name'], exp['microscope'], base_filename)
        dro = DagRunOrder(run_id=run_id)
        d = {
            'directory':
            context['ti'].xcom_pull(task_ids='parse_config',
                                    key='experiment_directory'),
            'base':
            base_filename,
            'experiment':
            exp['name'],
        }
        LOG.info('triggering dag %s with %s' % (run_id, d))
        # now = this
        dro.payload = d
        # implement dry_run somehow
        yield dro
    return
def dispatch_jobs(**context):
    logger.debug("context: %s", context)

    pending_dags: List[dict] = get_trigger_dags()
    for pending_dag in pending_dags:
        dro = DagRunOrder(run_id="trig__" + timezone.utcnow().isoformat())
        dro.payload = pending_dag["payload"]

        # trigger airflow dag
        trigger_dag(
            dag_id=pending_dag["target_dag_id"],
            run_id=dro.run_id,
            conf=json.dumps(dro.payload),
            execution_date=None,
            replace_microseconds=False,
        )

        logger.info("Triggered: %s", pending_dag["target_dag_id"])
    def execute(self, context, **kwargs):
        """
        if loop count > 0
        :param context:
        :param kwargs:
        :return:
        """
        self.loop_id = 'loop_' + context['execution_date'].strftime('%Y_%m_%d_%H_%M_%S')
        self.log.info('Loop id ' + self.loop_id)
        loop_count = self.get_loop_count()

        if loop_count != 0:
            loop_dag_run_id = '__'.join(['loop', self.dag_id, self.loop_id, str(loop_count)])
            self.log.info('DagRun Loop id ' + loop_dag_run_id)
            dro = DagRunOrder(run_id=loop_dag_run_id)
            dro = self.python_callable(context, dro)
            if dro:
                self.log.info('Loop criteria met. Loop count %d' % loop_count)
                self.dag.create_dagrun(
                    run_id=dro.run_id,
                    state=State.RUNNING,
                    conf=dro.payload,
                    execution_date=context['execution_date'] + dt.timedelta(microseconds=1) if context[
                        'execution_date'] else None,
                    external_trigger=True)

                self.log.info('Decrementing loop from %d' % loop_count)
                self.set_loop_count(loop_count - 1)

                if self.skip_downstream:
                    self.log.info('Skipping the downstream tasks')
                    downstream_tasks = context['task'].get_flat_relatives(upstream=False)
                    self.log.debug("Downstream task_ids %s", downstream_tasks)

                    if downstream_tasks:
                        self.skip(context['dag_run'], context['ti'].execution_date, downstream_tasks)
            else:
                self.log.info("Loop criteria not met. Continuing the downstream tasks")
                self.delete_loop_count()
        else:
            self.log.info('Loop count is 0. Continuing the downstream tasks ')
            self.delete_loop_count()
Example #9
0
 def maybe_spawn_dags(**kwargs):
     """
     This is a generator which returns appropriate DagRunOrders
     """
     print('kwargs:')
     pprint(kwargs)
     print('dag_run conf:')
     pprint(kwargs['dag_run'].conf)
     metadata = kwargs['dag_run'].conf['metadata']
     auth_tok = kwargs['dag_run'].conf['auth_tok']
     payload = {k:kwargs['dag_run'].conf[k] for k in kwargs['dag_run'].conf}
     payload['apply'] = 'salmon_rnaseq_10x'
     if 'dag_provenance' in payload:
         payload['dag_provenance'].update(utils.get_git_provenance_dict(__file__))
     else:
         new_prov = utils.get_git_provenance_list(__file__)
         if 'dag_provenance_list' in payload:
             new_prov.extend(payload['dag_provenance_list'])
         payload['dag_provenance_list'] = new_prov
     yield DagRunOrder(payload=payload)
 def flex_maybe_spawn(**kwargs):
     """
     This is a generator which returns appropriate DagRunOrders
     """
     print('kwargs:')
     pprint(kwargs)
     print('dag_run conf:')
     ctx = kwargs['dag_run'].conf
     pprint(ctx)
     md_extract_retcode = int(
         kwargs['ti'].xcom_pull(task_ids="run_md_extract"))
     md_consistency_retcode = int(
         kwargs['ti'].xcom_pull(task_ids="md_consistency_tests"))
     if md_extract_retcode == 0 and md_consistency_retcode == 0:
         collectiontype = kwargs['ti'].xcom_pull(key='collectiontype',
                                                 task_ids="send_status_msg")
         assay_type = kwargs['ti'].xcom_pull(key='assay_type',
                                             task_ids="send_status_msg")
         print('collectiontype: <{}>, assay_type: <{}>'.format(
             collectiontype, assay_type))
         md_fname = os.path.join(utils.get_tmp_dir_path(kwargs['run_id']),
                                 'rslt.yml')
         with open(md_fname, 'r') as f:
             md = yaml.safe_load(f)
         payload = {
             k: kwargs['dag_run'].conf[k]
             for k in kwargs['dag_run'].conf
         }
         payload = {
             'ingest_id': ctx['run_id'],
             'crypt_auth_tok': ctx['crypt_auth_tok'],
             'parent_lz_path': ctx['lz_path'],
             'parent_submission_id': ctx['submission_id'],
             'metadata': md,
             'dag_provenance_list': utils.get_git_provenance_list(__file__)
         }
         for next_dag in utils.downstream_workflow_iter(
                 collectiontype, assay_type):
             yield next_dag, DagRunOrder(payload=payload)
     else:
         return None
def generate_dag_run():
    """Callable can return explicit DagRuns"""
    for i in range(10):
        yield DagRunOrder(payload={'timeout': i})
Example #12
0
def generate_dag_run():
    for i in range(100):
        yield DagRunOrder(payload={'index': i})
Example #13
0
def fill_target_dag_payload(context: Dict[str, object],
                            order: DagRunOrder) -> DagRunOrder:
    print(f"Context: '{context}'")
    order.payload = {"message": 'hello from trigger dag'}
    print(f"DagRunOrder payload: '{order.payload}'")
    return order
def generate_dag_run():
    return [DagRunOrder(payload={'timeout': i}) for i in range(10)]