def execute(self, context):
        # Specifying a service account file allows the user to using non default
        # authentication for creating a Kubernetes Pod. This is done by setting the
        # environment variable `GOOGLE_APPLICATION_CREDENTIALS` that gcloud looks at.
        key_file = None

        # If gcp_conn_id is not specified gcloud will use the default
        # service account credentials.
        if self.gcp_conn_id:
            from airflow.hooks.base_hook import BaseHook
            # extras is a deserialized json object
            extras = BaseHook.get_connection(self.gcp_conn_id).extra_dejson
            # key_file only gets set if a json file is created from a JSON string in
            # the web ui, else none
            key_file = self._set_env_from_extras(extras=extras)

        # Write config to a temp file and set the environment variable to point to it.
        # This is to avoid race conditions of reading/writing a single file
        with tempfile.NamedTemporaryFile() as conf_file:
            os.environ[KUBE_CONFIG_ENV_VAR] = conf_file.name
            # Attempt to get/update credentials
            # We call gcloud directly instead of using google-cloud-python api
            # because there is no way to write kubernetes config to a file, which is
            # required by KubernetesPodOperator.
            # The gcloud command looks at the env variable `KUBECONFIG` for where to save
            # the kubernetes config file.
            subprocess.check_call(
                ["gcloud", "container", "clusters", "get-credentials",
                 self.cluster_name,
                 "--zone", self.location,
                 "--project", self.project_id])

            # Since the key file is of type mkstemp() closing the file will delete it from
            # the file system so it cannot be accessed after we don't need it anymore
            if key_file:
                key_file.close()

            # Tell `KubernetesPodOperator` where the config file is located
            self.config_file = os.environ[KUBE_CONFIG_ENV_VAR]
            return super().execute(context)
    def execute(self, context):
        source_hook = BaseHook.get_hook(self.source_conn_id)

        self.log.info("Extracting data from %s", self.source_conn_id)
        self.log.info("Executing: \n %s", self.sql)
        results = source_hook.get_records(self.sql)

        destination_hook = BaseHook.get_hook(self.destination_conn_id)
        if self.preoperator:
            self.log.info("Running preoperator")
            self.log.info(self.preoperator)
            destination_hook.run(self.preoperator)

        self.log.info("Inserting rows into %s", self.destination_conn_id)
        destination_hook.insert_rows(table=self.destination_table, rows=results)
Exemple #3
0
    def execute(self, context):
        source_hook = BaseHook.get_hook(self.source_conn_id)

        logging.info("Extracting data from {}".format(self.source_conn_id))
        logging.info("Executing: \n" + self.sql)
        results = source_hook.get_records(self.sql)

        destination_hook = BaseHook.get_hook(self.destination_conn_id)
        if self.preoperator:
            logging.info("Running preoperator")
            logging.info(self.preoperator)
            destination_hook.run(self.preoperator)

        logging.info("Inserting rows into {}".format(self.destination_conn_id))
        destination_hook.insert_rows(table=self.destination_table, rows=results)
    def execute(self, context):
        # If gcp_conn_id is not specified gcloud will use the default
        # service account credentials.
        if self.gcp_conn_id:
            from airflow.hooks.base_hook import BaseHook
            # extras is a deserialized json object
            extras = BaseHook.get_connection(self.gcp_conn_id).extra_dejson
            self._set_env_from_extras(extras=extras)

        # Write config to a temp file and set the environment variable to point to it.
        # This is to avoid race conditions of reading/writing a single file
        with tempfile.NamedTemporaryFile() as conf_file:
            os.environ[KUBE_CONFIG_ENV_VAR] = conf_file.name
            # Attempt to get/update credentials
            # We call gcloud directly instead of using google-cloud-python api
            # because there is no way to write kubernetes config to a file, which is
            # required by KubernetesPodOperator.
            # The gcloud command looks at the env variable `KUBECONFIG` for where to save
            # the kubernetes config file.
            subprocess.check_call(
                ["gcloud", "container", "clusters", "get-credentials",
                 self.cluster_name,
                 "--zone", self.location,
                 "--project", self.project_id])

            # Tell `KubernetesPodOperator` where the config file is located
            self.config_file = os.environ[KUBE_CONFIG_ENV_VAR]
            super(GKEPodOperator, self).execute(context)
    def poke(self, context):
        hook = BaseHook.get_connection(self.conn_id).get_hook()

        self.log.info('Poking: %s', self.sql)
        records = hook.get_records(self.sql)
        if not records:
            return False
        return str(records[0][0]) not in ('0', '')
def _get_project_id():
  """Get project ID from default GCP connection."""

  extras = BaseHook.get_connection('google_cloud_default').extra_dejson
  key = 'extra__google_cloud_platform__project'
  if key in extras:
    project_id = extras[key]
  else:
    raise ('Must configure project_id in google_cloud_default '
           'connection from Airflow Console')
  return project_id
    def poke(self, context):
        hook = BaseHook.get_connection(self.conn_id).get_hook()

        logging.info('Poking: ' + self.sql)
        records = hook.get_records(self.sql)
        if not records:
            return False
        else:
            if str(records[0][0]) in ('0', '',):
                return False
            else:
                return True
            print(records[0][0])
    def poke(self, context):
        conn = BaseHook.get_connection(self.conn_id)

        allowed_conn_type = {'google_cloud_platform', 'jdbc', 'mssql',
                             'mysql', 'oracle', 'postgres',
                             'presto', 'sqlite', 'vertica'}
        if conn.conn_type not in allowed_conn_type:
            raise AirflowException("The connection type is not supported by SqlSensor. " +
                                   "Supported connection types: {}".format(list(allowed_conn_type)))
        hook = conn.get_hook()

        self.log.info('Poking: %s (with parameters %s)', self.sql, self.parameters)
        records = hook.get_records(self.sql, self.parameters)
        if not records:
            return False
        return str(records[0][0]) not in ('0', '')
    def poke(self, context):
        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        this.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)
        except Exception as e:
            logging.exception(e)
            status = False

        this.log.info('Status of this Poke: %s', status)

        return status
    def get_extra_links(self, operator, dttm):
        """
        Get link to qubole command result page.

        :param operator: operator
        :param dttm: datetime
        :return: url link
        """
        conn = BaseHook.get_connection(operator.kwargs['qubole_conn_id'])
        if conn and conn.host:
            host = re.sub(r'api$', 'v2/analyze?command_id=', conn.host)
        else:
            host = 'https://api.qubole.com/v2/analyze?command_id='

        ti = TaskInstance(task=operator, execution_date=dttm)
        qds_command_id = ti.xcom_pull(task_ids=operator.task_id, key='qbol_cmd_id')
        url = host + str(qds_command_id) if qds_command_id else ''
        return url
    def execute(self, context):
        source_hook = BaseHook.get_hook(self.source_conn_id)

        logging.info("Extracting data from {}".format(self.source_conn_id))
        logging.info("Executing: \n" + self.sql)
        results = source_hook.get_records(self.sql)

        destination_hook = TeradataHook(teradata_conn_id=self.destination_conn_id)
        if self.preoperator:
            logging.info("Running preoperator")
            logging.info(self.preoperator)
            destination_hook.run(self.preoperator)

        if self.batch:
            logging.info("Inserting {} rows into {} with a batch size of {} rows".format(len(results), self.destination_conn_id, self.batch_size))
            destination_hook.bulk_insert_rows(table=self.destination_table, rows=iter(results), commit_every=self.batch_size,  unicode_source=self.unicode_source)
        else:
            logging.info("Inserting {} rows into {}".format(len(results), self.destination_conn_id))
            destination_hook.insert_rows(table=self.destination_table, rows=iter(results), commit_every=1000, unicode_source=self.unicode_source )
 def __init__(self,
              sql,
              autocommit=False,
              parameters=None,
              gcp_conn_id='google_cloud_default',
              gcp_cloudsql_conn_id='google_cloud_sql_default',
              *args, **kwargs):
     super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
     self.sql = sql
     self.gcp_conn_id = gcp_conn_id
     self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
     self.autocommit = autocommit
     self.parameters = parameters
     self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
     self.cloudsql_db_hook = CloudSqlDatabaseHook(
         gcp_cloudsql_conn_id=gcp_cloudsql_conn_id,
         default_gcp_project_id=self.gcp_connection.extra_dejson.get(
             'extra__google_cloud_platform__project'))
     self.cloud_sql_proxy_runner = None
     self.database_hook = None
 def get_db_hook(self):
     return BaseHook.get_hook(conn_id=self.conn_id)