Ejemplo n.º 1
0
    def __init__(self,
                 project_id,
                 dataset_id,
                 table_id,
                 field_id,
                 field_type,
                 format_string=None,
                 timezone=None,
                 bigquery_conn_id='bigquery_default',
                 delegate_to=None,
                 *args,
                 **kwargs):
        max_field = bigquery_singlevalue_formatter(aggregation_function='MAX',
                                                   field_id=field_id,
                                                   field_type=field_type,
                                                   format_string=format_string,
                                                   timezone=timezone)

        self.sql_template_params = {
            'project_id': project_id,
            'dataset_id': dataset_id,
            'table_id': table_id,
            'max_field': max_field
        }

        self.bigquery_conn_id = bigquery_conn_id
        self.delegate_to = delegate_to

        BaseOperator.__init__(self, *args, **kwargs)
Ejemplo n.º 2
0
 def __init__(self,
              ssh_hook,
              lsf_script,
              bsub=DEFAULT_BSUB,
              bjobs=DEFAULT_BJOBS,
              queue_name=DEFAULT_QUEUE_NAME,
              bsub_args='',
              bkill=DEFAULT_BKILL,
              poke_interval=10,
              timeout=60 * 60,
              soft_fail=False,
              env=None,
              *args,
              **kwargs):
     self.bsub = bsub
     self.bsub_args = bsub_args
     self.bjobs = bjobs
     self.queue_name = queue_name
     self.lsf_script = lsf_script
     self.hook = ssh_hook
     self.jobid = None
     self.timeout = timeout
     self.poke_interval = poke_interval
     self.soft_fail = soft_fail
     self.env = env
     self.prevent_returncode = None
     BaseOperator.__init__(self, *args, **kwargs)
Ejemplo n.º 3
0
    def __init__(
        self,
        *,
        job_name: str,
        job_definition: str,
        job_queue: str,
        overrides: dict,
        array_properties: Optional[dict] = None,
        parameters: Optional[dict] = None,
        job_id: Optional[str] = None,
        waiters: Optional[Any] = None,
        max_retries: Optional[int] = None,
        status_retries: Optional[int] = None,
        aws_conn_id: Optional[str] = None,
        region_name: Optional[str] = None,
        tags: Optional[dict] = None,
        **kwargs,
    ):

        BaseOperator.__init__(self, **kwargs)
        self.job_id = job_id
        self.job_name = job_name
        self.job_definition = job_definition
        self.job_queue = job_queue
        self.overrides = overrides or {}
        self.array_properties = array_properties or {}
        self.parameters = parameters or {}
        self.waiters = waiters
        self.tags = tags or {}
        self.hook = BatchClientHook(
            max_retries=max_retries,
            status_retries=status_retries,
            aws_conn_id=aws_conn_id,
            region_name=region_name,
        )
Ejemplo n.º 4
0
    def __init__(self,
                 project,
                 control,
                 namespace,
                 kind,
                 metadata_converter,
                 task_id=None,
                 filters=[],
                 *args,
                 **kwargs):

        BaseOperator.__init__(self,
                              task_id=task_id if task_id else
                              'datastore_get_{}_{}'.format(namespace, kind),
                              *args,
                              **kwargs)

        self.project = project
        self.control = control
        self.namespace = namespace
        self.kind = kind
        self.filters = filters
        self.metadata_converter = metadata_converter(self)

        self.entity = None
def run_airflow_task(task: BaseOperator, dag: DAG):
    dag.clear()
    task.run(
        start_date=dag.default_args["start_date"],
        end_date=dag.default_args["start_date"],
        ignore_ti_state=True,
    )
    def __init__(
        self,
        job_name,
        job_definition,
        job_queue,
        overrides,
        array_properties=None,
        parameters=None,
        job_id=None,
        waiters=None,
        max_retries=None,
        status_retries=None,
        aws_conn_id=None,
        region_name=None,
        **kwargs,
    ):  # pylint: disable=too-many-arguments

        BaseOperator.__init__(self, **kwargs)
        AwsBatchClient.__init__(
            self,
            max_retries=max_retries,
            status_retries=status_retries,
            aws_conn_id=aws_conn_id,
            region_name=region_name,
        )

        self.job_id = job_id
        self.job_name = job_name
        self.job_definition = job_definition
        self.job_queue = job_queue
        self.overrides = overrides
        self.array_properties = array_properties or {}
        self.parameters = parameters
        self.waiters = waiters
Ejemplo n.º 7
0
 def transform(self, input_operator: BaseOperator,
               parent_fragment: DAGFragment,
               upstream_fragments: List[DAGFragment]) -> DAGFragment:
     input_operator._dag = None
     input_operator._upstream_task_ids.clear()
     input_operator._downstream_task_ids.clear()
     input_operator.dag = self.dag
     return DAGFragment([input_operator])
Ejemplo n.º 8
0
    def __init__(self, phase, step, given_now=None, *args, **kwargs):
        BaseOperator.__init__(self,
                              task_id='{}_{}'.format(step, phase),
                              *args,
                              **kwargs)

        self.phase = phase
        self.step = step
        self.given_now = given_now
Ejemplo n.º 9
0
 def __init__(self, *args, **kwargs):
     BaseOperator.__init__(self, *args, **kwargs)
     # task_type is used by UI to display the correct class type, because UI only
     # receives BaseOperator from deserialized DAGs.
     self._task_type = 'BaseOperator'
     # Move class attributes into object attributes.
     self.ui_color = BaseOperator.ui_color
     self.ui_fgcolor = BaseOperator.ui_fgcolor
     self.template_fields = BaseOperator.template_fields
Ejemplo n.º 10
0
    def __init__(self,
                 gcs_file_path,
                 bing_maps_conn_id='bing_maps_default',
                 *args,
                 **kwargs):
        self.gcs_file_path = gcs_file_path
        self.bing_maps_conn_id = bing_maps_conn_id

        BaseOperator.__init__(self, task_id=self.operation, *args, **kwargs)
Ejemplo n.º 11
0
 def persist(
     operator_instance: BaseOperator,
     context: "Context",
     project_id: Optional[str],
 ):
     operator_instance.xcom_push(
         context,
         key=CloudTasksLink.key,
         value={"project_id": project_id},
     )
Ejemplo n.º 12
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=WorkflowsListOfWorkflowsLink.key,
         value={"project_id": project_id},
     )
Ejemplo n.º 13
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=MemcachedInstanceListLink.key,
         value={"project_id": project_id},
     )
Ejemplo n.º 14
0
 def persist(context: "Context", task_instance: BaseOperator, uri: str,
             project_id: Optional[str]):
     task_instance.xcom_push(
         context=context,
         key=FileDetailsLink.key,
         value={
             "uri": uri,
             "project_id": project_id
         },
     )
Ejemplo n.º 15
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     instance_id: str,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=SpannerInstanceLink.key,
         value={"instance_id": instance_id, "project_id": project_id},
     )
Ejemplo n.º 16
0
 def persist(
     operator_instance: BaseOperator,
     context: "Context",
     project_id: Optional[str],
     region: Optional[str],
     job_id: Optional[str],
 ):
     operator_instance.xcom_push(
         context,
         key=DataflowJobLink.key,
         value={"project_id": project_id, "location": region, "job_id": job_id},
     )
Ejemplo n.º 17
0
 def __init__(
     self,
     *,
     task_id: str,
     spell_owner: Optional[str] = None,
     spell_conn_id: Optional[str] = None,
     **kwargs,
 ):
     BaseOperator.__init__(self, task_id=task_id)
     SpellClient.__init__(self,
                          spell_conn_id=spell_conn_id,
                          spell_owner=spell_owner)
     self.kwargs = kwargs
Ejemplo n.º 18
0
 def __init__(self,
              include_timestamp: bool = True,
              postgres_conn_id: str = "postgres_ods_dev",
              database: str = "ODS",
              data_key: str = "entity_snapshots",
              chunksize: int = 1000,
              *args,
              **kwargs):
     BaseOperator.__init__(self, *args, **kwargs)
     self.include_timestamp = include_timestamp
     self.postgres_conn_id = postgres_conn_id
     self.database = database
     self.data_key = data_key
     self.chunksize = chunksize
Ejemplo n.º 19
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     subscription_id: Optional[str],
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=PubSubSubscriptionLink.key,
         value={
             "subscription_id": subscription_id,
             "project_id": project_id
         },
     )
Ejemplo n.º 20
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     dataset_id: str,
     project_id: str,
 ):
     task_instance.xcom_push(
         context,
         key=BigQueryDatasetLink.key,
         value={
             "dataset_id": dataset_id,
             "project_id": project_id
         },
     )
Ejemplo n.º 21
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     cloud_sql_instance: str,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=CloudSQLInstanceDatabaseLink.key,
         value={
             "instance": cloud_sql_instance,
             "project_id": project_id
         },
     )
Ejemplo n.º 22
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     topic_id: str,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=PubSubTopicLink.key,
         value={
             "topic_id": topic_id,
             "project_id": project_id
         },
     )
Ejemplo n.º 23
0
    def __init__(self,
                 project_id,
                 dataset_id,
                 table_id,
                 bigquery_conn_id='bigquery_default',
                 *args,
                 **kwargs):
        self.task_id = 'drop-table-{}.{}'.format(dataset_id, table_id)

        self.project_id = project_id
        self.dataset_id = dataset_id
        self.table_id = table_id
        self.bigquery_conn_id = bigquery_conn_id

        BaseOperator.__init__(self, task_id=self.task_id, *args, **kwargs)
Ejemplo n.º 24
0
 def persist(
     operator_instance: BaseOperator,
     context: "Context",
     queue_name: Optional[str],
 ):
     project_id, location, queue_id = CloudTasksQueueLink.extract_parts(
         queue_name)
     operator_instance.xcom_push(
         context,
         key=CloudTasksQueueLink.key,
         value={
             "project_id": project_id,
             "location": location,
             "queue_id": queue_id
         },
     )
Ejemplo n.º 25
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     location_id: str,
     workflow_id: str,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=WorkflowsWorkflowDetailsLink.key,
         value={
             "location_id": location_id,
             "workflow_id": workflow_id,
             "project_id": project_id
         },
     )
Ejemplo n.º 26
0
def make_simple_dag():
    """Make very simple DAG to verify serialization result."""
    dag = DAG(dag_id='simple_dag')
    BaseOperator(task_id='simple_task',
                 dag=dag,
                 start_date=datetime(2019, 8, 1))
    return {'simple_dag': dag}
Ejemplo n.º 27
0
    def test_schedule_dag_start_end_dates(self):
        """
        Tests that an attempt to schedule a task after the Dag's end_date
        does not succeed.
        """
        delta = timedelta(hours=1)
        runs = 3
        start_date = DEFAULT_DATE
        end_date = start_date + (runs - 1) * delta

        dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID,
                  start_date=start_date,
                  end_date=end_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        # Create and schedule the dag runs
        dag_runs = []
        scheduler = jobs.SchedulerJob(**self.default_scheduler_args)
        for _ in range(runs):
            dag_runs.append(scheduler.create_dag_run(dag))

        additional_dag_run = scheduler.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
Ejemplo n.º 28
0
    def test_fractional_seconds(self):
        """
        Tests if fractional seconds are stored in the database
        """
        dag = DAG(TEST_DAG_ID + 'test_fractional_seconds')
        dag.schedule_interval = '@once'
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))

        start_date = timezone.utcnow()

        run = dag.create_dagrun(
            run_id='test_' + start_date.isoformat(),
            execution_date=start_date,
            start_date=start_date,
            state=State.RUNNING,
            external_trigger=False
        )

        run.refresh_from_db()

        self.assertEqual(start_date, run.execution_date,
                         "dag run execution_date loses precision")
        self.assertEqual(start_date, run.start_date,
                         "dag run start_date loses precision ")
Ejemplo n.º 29
0
    def test_schedule_dag_fake_scheduled_previous(self):
        """
        Test scheduling a dag where there is a prior DagRun
        which has the same run_id as the next run should have
        """
        delta = timedelta(hours=1)

        dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID,
                  schedule_interval=delta,
                  start_date=DEFAULT_DATE)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=DEFAULT_DATE))

        scheduler = jobs.SchedulerJob(**self.default_scheduler_args)
        dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE),
                          execution_date=DEFAULT_DATE,
                          state=State.SUCCESS,
                          external_trigger=True)
        dag_run = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            DEFAULT_DATE + delta,
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
Ejemplo n.º 30
0
 def persist(
     context: "Context",
     task_instance: BaseOperator,
     instance_id: str,
     location_id: str,
     project_id: Optional[str],
 ):
     task_instance.xcom_push(
         context,
         key=RedisInstanceDetailsLink.key,
         value={
             "instance_id": instance_id,
             "location_id": location_id,
             "project_id": project_id
         },
     )