def __init__(self, project_id, dataset_id, table_id, field_id, field_type, format_string=None, timezone=None, bigquery_conn_id='bigquery_default', delegate_to=None, *args, **kwargs): max_field = bigquery_singlevalue_formatter(aggregation_function='MAX', field_id=field_id, field_type=field_type, format_string=format_string, timezone=timezone) self.sql_template_params = { 'project_id': project_id, 'dataset_id': dataset_id, 'table_id': table_id, 'max_field': max_field } self.bigquery_conn_id = bigquery_conn_id self.delegate_to = delegate_to BaseOperator.__init__(self, *args, **kwargs)
def __init__(self, ssh_hook, lsf_script, bsub=DEFAULT_BSUB, bjobs=DEFAULT_BJOBS, queue_name=DEFAULT_QUEUE_NAME, bsub_args='', bkill=DEFAULT_BKILL, poke_interval=10, timeout=60 * 60, soft_fail=False, env=None, *args, **kwargs): self.bsub = bsub self.bsub_args = bsub_args self.bjobs = bjobs self.queue_name = queue_name self.lsf_script = lsf_script self.hook = ssh_hook self.jobid = None self.timeout = timeout self.poke_interval = poke_interval self.soft_fail = soft_fail self.env = env self.prevent_returncode = None BaseOperator.__init__(self, *args, **kwargs)
def __init__( self, *, job_name: str, job_definition: str, job_queue: str, overrides: dict, array_properties: Optional[dict] = None, parameters: Optional[dict] = None, job_id: Optional[str] = None, waiters: Optional[Any] = None, max_retries: Optional[int] = None, status_retries: Optional[int] = None, aws_conn_id: Optional[str] = None, region_name: Optional[str] = None, tags: Optional[dict] = None, **kwargs, ): BaseOperator.__init__(self, **kwargs) self.job_id = job_id self.job_name = job_name self.job_definition = job_definition self.job_queue = job_queue self.overrides = overrides or {} self.array_properties = array_properties or {} self.parameters = parameters or {} self.waiters = waiters self.tags = tags or {} self.hook = BatchClientHook( max_retries=max_retries, status_retries=status_retries, aws_conn_id=aws_conn_id, region_name=region_name, )
def __init__(self, project, control, namespace, kind, metadata_converter, task_id=None, filters=[], *args, **kwargs): BaseOperator.__init__(self, task_id=task_id if task_id else 'datastore_get_{}_{}'.format(namespace, kind), *args, **kwargs) self.project = project self.control = control self.namespace = namespace self.kind = kind self.filters = filters self.metadata_converter = metadata_converter(self) self.entity = None
def run_airflow_task(task: BaseOperator, dag: DAG): dag.clear() task.run( start_date=dag.default_args["start_date"], end_date=dag.default_args["start_date"], ignore_ti_state=True, )
def __init__( self, job_name, job_definition, job_queue, overrides, array_properties=None, parameters=None, job_id=None, waiters=None, max_retries=None, status_retries=None, aws_conn_id=None, region_name=None, **kwargs, ): # pylint: disable=too-many-arguments BaseOperator.__init__(self, **kwargs) AwsBatchClient.__init__( self, max_retries=max_retries, status_retries=status_retries, aws_conn_id=aws_conn_id, region_name=region_name, ) self.job_id = job_id self.job_name = job_name self.job_definition = job_definition self.job_queue = job_queue self.overrides = overrides self.array_properties = array_properties or {} self.parameters = parameters self.waiters = waiters
def transform(self, input_operator: BaseOperator, parent_fragment: DAGFragment, upstream_fragments: List[DAGFragment]) -> DAGFragment: input_operator._dag = None input_operator._upstream_task_ids.clear() input_operator._downstream_task_ids.clear() input_operator.dag = self.dag return DAGFragment([input_operator])
def __init__(self, phase, step, given_now=None, *args, **kwargs): BaseOperator.__init__(self, task_id='{}_{}'.format(step, phase), *args, **kwargs) self.phase = phase self.step = step self.given_now = given_now
def __init__(self, *args, **kwargs): BaseOperator.__init__(self, *args, **kwargs) # task_type is used by UI to display the correct class type, because UI only # receives BaseOperator from deserialized DAGs. self._task_type = 'BaseOperator' # Move class attributes into object attributes. self.ui_color = BaseOperator.ui_color self.ui_fgcolor = BaseOperator.ui_fgcolor self.template_fields = BaseOperator.template_fields
def __init__(self, gcs_file_path, bing_maps_conn_id='bing_maps_default', *args, **kwargs): self.gcs_file_path = gcs_file_path self.bing_maps_conn_id = bing_maps_conn_id BaseOperator.__init__(self, task_id=self.operation, *args, **kwargs)
def persist( operator_instance: BaseOperator, context: "Context", project_id: Optional[str], ): operator_instance.xcom_push( context, key=CloudTasksLink.key, value={"project_id": project_id}, )
def persist( context: "Context", task_instance: BaseOperator, project_id: Optional[str], ): task_instance.xcom_push( context, key=WorkflowsListOfWorkflowsLink.key, value={"project_id": project_id}, )
def persist( context: "Context", task_instance: BaseOperator, project_id: Optional[str], ): task_instance.xcom_push( context, key=MemcachedInstanceListLink.key, value={"project_id": project_id}, )
def persist(context: "Context", task_instance: BaseOperator, uri: str, project_id: Optional[str]): task_instance.xcom_push( context=context, key=FileDetailsLink.key, value={ "uri": uri, "project_id": project_id }, )
def persist( context: "Context", task_instance: BaseOperator, instance_id: str, project_id: Optional[str], ): task_instance.xcom_push( context, key=SpannerInstanceLink.key, value={"instance_id": instance_id, "project_id": project_id}, )
def persist( operator_instance: BaseOperator, context: "Context", project_id: Optional[str], region: Optional[str], job_id: Optional[str], ): operator_instance.xcom_push( context, key=DataflowJobLink.key, value={"project_id": project_id, "location": region, "job_id": job_id}, )
def __init__( self, *, task_id: str, spell_owner: Optional[str] = None, spell_conn_id: Optional[str] = None, **kwargs, ): BaseOperator.__init__(self, task_id=task_id) SpellClient.__init__(self, spell_conn_id=spell_conn_id, spell_owner=spell_owner) self.kwargs = kwargs
def __init__(self, include_timestamp: bool = True, postgres_conn_id: str = "postgres_ods_dev", database: str = "ODS", data_key: str = "entity_snapshots", chunksize: int = 1000, *args, **kwargs): BaseOperator.__init__(self, *args, **kwargs) self.include_timestamp = include_timestamp self.postgres_conn_id = postgres_conn_id self.database = database self.data_key = data_key self.chunksize = chunksize
def persist( context: "Context", task_instance: BaseOperator, subscription_id: Optional[str], project_id: Optional[str], ): task_instance.xcom_push( context, key=PubSubSubscriptionLink.key, value={ "subscription_id": subscription_id, "project_id": project_id }, )
def persist( context: "Context", task_instance: BaseOperator, dataset_id: str, project_id: str, ): task_instance.xcom_push( context, key=BigQueryDatasetLink.key, value={ "dataset_id": dataset_id, "project_id": project_id }, )
def persist( context: "Context", task_instance: BaseOperator, cloud_sql_instance: str, project_id: Optional[str], ): task_instance.xcom_push( context, key=CloudSQLInstanceDatabaseLink.key, value={ "instance": cloud_sql_instance, "project_id": project_id }, )
def persist( context: "Context", task_instance: BaseOperator, topic_id: str, project_id: Optional[str], ): task_instance.xcom_push( context, key=PubSubTopicLink.key, value={ "topic_id": topic_id, "project_id": project_id }, )
def __init__(self, project_id, dataset_id, table_id, bigquery_conn_id='bigquery_default', *args, **kwargs): self.task_id = 'drop-table-{}.{}'.format(dataset_id, table_id) self.project_id = project_id self.dataset_id = dataset_id self.table_id = table_id self.bigquery_conn_id = bigquery_conn_id BaseOperator.__init__(self, task_id=self.task_id, *args, **kwargs)
def persist( operator_instance: BaseOperator, context: "Context", queue_name: Optional[str], ): project_id, location, queue_id = CloudTasksQueueLink.extract_parts( queue_name) operator_instance.xcom_push( context, key=CloudTasksQueueLink.key, value={ "project_id": project_id, "location": location, "queue_id": queue_id }, )
def persist( context: "Context", task_instance: BaseOperator, location_id: str, workflow_id: str, project_id: Optional[str], ): task_instance.xcom_push( context, key=WorkflowsWorkflowDetailsLink.key, value={ "location_id": location_id, "workflow_id": workflow_id, "project_id": project_id }, )
def make_simple_dag(): """Make very simple DAG to verify serialization result.""" dag = DAG(dag_id='simple_dag') BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019, 8, 1)) return {'simple_dag': dag}
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) # Create and schedule the dag runs dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_runs.append(scheduler.create_dag_run(dag)) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_fractional_seconds(self): """ Tests if fractional seconds are stored in the database """ dag = DAG(TEST_DAG_ID + 'test_fractional_seconds') dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) start_date = timezone.utcnow() run = dag.create_dagrun( run_id='test_' + start_date.isoformat(), execution_date=start_date, start_date=start_date, state=State.RUNNING, external_trigger=False ) run.refresh_from_db() self.assertEqual(start_date, run.execution_date, "dag run execution_date loses precision") self.assertEqual(start_date, run.start_date, "dag run start_date loses precision ")
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) scheduler = jobs.SchedulerJob(**self.default_scheduler_args) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = scheduler.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def persist( context: "Context", task_instance: BaseOperator, instance_id: str, location_id: str, project_id: Optional[str], ): task_instance.xcom_push( context, key=RedisInstanceDetailsLink.key, value={ "instance_id": instance_id, "location_id": location_id, "project_id": project_id }, )