Exemple #1
0
def send_mime_email(e_from, e_to, mime_msg, dryrun=False):
    """
    Send MIME email.
    """
    log = LoggingMixin().log

    smtp_host = conf.get('smtp', 'SMTP_HOST')
    smtp_port = conf.getint('smtp', 'SMTP_PORT')
    smtp_starttls = conf.getboolean('smtp', 'SMTP_STARTTLS')
    smtp_ssl = conf.getboolean('smtp', 'SMTP_SSL')
    smtp_user = None
    smtp_password = None

    try:
        smtp_user = conf.get('smtp', 'SMTP_USER')
        smtp_password = conf.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug("No user/password found for SMTP, so logging in with no authentication.")

    if not dryrun:
        conn = smtplib.SMTP_SSL(smtp_host, smtp_port) if smtp_ssl else smtplib.SMTP(smtp_host, smtp_port)
        if smtp_starttls:
            conn.starttls()
        if smtp_user and smtp_password:
            conn.login(smtp_user, smtp_password)
        log.info("Sent an alert email to %s", e_to)
        conn.sendmail(e_from, e_to, mime_msg.as_string())
        conn.quit()
def _split_tablename(table_input, default_project_id, var_name=None):
    assert default_project_id is not None, "INTERNAL: No default project is specified"

    def var_print(var_name):
        if var_name is None:
            return ""
        else:
            return "Format exception for {var}: ".format(var=var_name)

    if table_input.count('.') + table_input.count(':') > 3:
        raise Exception((
            '{var}Use either : or . to specify project '
            'got {input}'
        ).format(var=var_print(var_name), input=table_input))

    cmpt = table_input.rsplit(':', 1)
    project_id = None
    rest = table_input
    if len(cmpt) == 1:
        project_id = None
        rest = cmpt[0]
    elif len(cmpt) == 2 and cmpt[0].count(':') <= 1:
        if cmpt[-1].count('.') != 2:
            project_id = cmpt[0]
            rest = cmpt[1]
    else:
        raise Exception((
            '{var}Expect format of (<project:)<dataset>.<table>, '
            'got {input}'
        ).format(var=var_print(var_name), input=table_input))

    cmpt = rest.split('.')
    if len(cmpt) == 3:
        assert project_id is None, (
            "{var}Use either : or . to specify project"
        ).format(var=var_print(var_name))
        project_id = cmpt[0]
        dataset_id = cmpt[1]
        table_id = cmpt[2]

    elif len(cmpt) == 2:
        dataset_id = cmpt[0]
        table_id = cmpt[1]
    else:
        raise Exception((
            '{var}Expect format of (<project.|<project:)<dataset>.<table>, '
            'got {input}'
        ).format(var=var_print(var_name), input=table_input))

    if project_id is None:
        if var_name is not None:
            log = LoggingMixin().log
            log.info(
                'Project not included in {var}: {input}; using project "{project}"'.format(
                    var=var_name, input=table_input, project=default_project_id
                )
            )
        project_id = default_project_id

    return project_id, dataset_id, table_id
Exemple #3
0
def send_notification(receiver, subject, html_content, **kwargs):
    """
    Send Slack notification using configuration from config file.

    :param receiver: email receiver, used to implement signature
    :param subject: email receiver, used to implement signature
    :param: html_content: message content, typically html format is used
    :return:
    """
    _ = receiver
    _ = subject
    _ = kwargs

    log = LoggingMixin().log

    try:
        token = configuration.conf.get('slack', 'TOKEN')
        channel = configuration.conf.get('slack', 'CHANNEL')
        username = configuration.conf.get('slack', 'USERNAME')
        for value in (token, channel, username):
            if not value:
                raise AirflowConfigException("error: empty value")
    except AirflowConfigException:
        log.info("No token/channel/username found for slack.")
        return

    send_notification_to_channel(format_message(html_content),
                                 channel, token, username)
Exemple #4
0
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    log = LoggingMixin().log

    SMTP_HOST = configuration.conf.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.conf.getint('smtp', 'SMTP_PORT')
    SMTP_STARTTLS = configuration.conf.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.conf.getboolean('smtp', 'SMTP_SSL')
    SMTP_USER = None
    SMTP_PASSWORD = None

    try:
        SMTP_USER = configuration.conf.get('smtp', 'SMTP_USER')
        SMTP_PASSWORD = configuration.conf.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug("No user/password found for SMTP, so logging in with no authentication.")

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP(SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        log.info("Sent an alert email to %s", e_to)
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
Exemple #5
0
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    log = LoggingMixin().log

    SMTP_HOST = configuration.conf.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.conf.getint('smtp', 'SMTP_PORT')
    SMTP_STARTTLS = configuration.conf.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.conf.getboolean('smtp', 'SMTP_SSL')
    SMTP_USER = None
    SMTP_PASSWORD = None

    try:
        SMTP_USER = configuration.conf.get('smtp', 'SMTP_USER')
        SMTP_PASSWORD = configuration.conf.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug(
            "No user/password found for SMTP, so logging in with no authentication."
        )

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST,
                             SMTP_PORT) if SMTP_SSL else smtplib.SMTP(
                                 SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        log.info("Sent an alert email to %s", e_to)
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
Exemple #6
0
    def fetchAccessToken(
        self,
        client_id=None,
        client_secret=None,
        login=None,
        password=None,
        refresh_token=None,
    ):

        sc = ServiceNowClient(
            auth_type=1,
            host=self.snow_cred.host,
        )

        self.snow_cred.password = sc.fetchAccessToken(
            client_id=client_id,
            client_secret=client_secret,
            login=login,
            password=password,
            refresh_token=refresh_token)

        log = LoggingMixin().log
        log.info("Using connection to fetch access_token : %s",
                 self.snow_cred.debug_info())

        return self.snow_cred.password
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        log.error(e)
        raise AirflowException('Celery command failed')
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        log.error(e)
        raise AirflowException('Celery command failed')
def _split_tablename(table_input, default_project_id, var_name=None):
    assert default_project_id is not None, "INTERNAL: No default project is specified"

    def var_print(var_name):
        if var_name is None:
            return ""
        else:
            return "Format exception for {var}: ".format(var=var_name)

    if table_input.count('.') + table_input.count(':') > 3:
        raise Exception(('{var}Use either : or . to specify project '
                         'got {input}').format(var=var_print(var_name),
                                               input=table_input))

    cmpt = table_input.rsplit(':', 1)
    project_id = None
    rest = table_input
    if len(cmpt) == 1:
        project_id = None
        rest = cmpt[0]
    elif len(cmpt) == 2 and cmpt[0].count(':') <= 1:
        if cmpt[-1].count('.') != 2:
            project_id = cmpt[0]
            rest = cmpt[1]
    else:
        raise Exception(('{var}Expect format of (<project:)<dataset>.<table>, '
                         'got {input}').format(var=var_print(var_name),
                                               input=table_input))

    cmpt = rest.split('.')
    if len(cmpt) == 3:
        assert project_id is None, (
            "{var}Use either : or . to specify project").format(
                var=var_print(var_name))
        project_id = cmpt[0]
        dataset_id = cmpt[1]
        table_id = cmpt[2]

    elif len(cmpt) == 2:
        dataset_id = cmpt[0]
        table_id = cmpt[1]
    else:
        raise Exception(
            ('{var}Expect format of (<project.|<project:)<dataset>.<table>, '
             'got {input}').format(var=var_print(var_name), input=table_input))

    if project_id is None:
        if var_name is not None:
            log = LoggingMixin().log
            log.info('Project not included in {var}: {input}; '
                     'using project "{project}"'.format(
                         var=var_name,
                         input=table_input,
                         project=default_project_id))
        project_id = default_project_id

    return project_id, dataset_id, table_id
Exemple #10
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'running':
                    log = LoggingMixin().log
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Exemple #11
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if response.status_code >= 200 and response.status_code < 300:
        log.info('Email with subject %s is successfully sent to recipients: %s' %
                 (mail_data['subject'], mail_data['personalizations']))
    else:
        log.warning('Failed to send out email with subject %s, status code: %s' %
                    (mail_data['subject'], response.status_code))
Exemple #12
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if 200 <= response.status_code < 300:
        log.info('Email with subject %s is successfully sent to recipients: %s',
                 mail_data['subject'], mail_data['personalizations'])
    else:
        log.warning('Failed to send out email with subject %s, status code: %s',
                    mail_data['subject'], response.status_code)
Exemple #13
0
 def get_query_results(self):
     log = LoggingMixin().log
     if self.cmd is not None:
         cmd_id = self.cmd.id
         log.info("command id: " + str(cmd_id))
         query_result_buffer = StringIO()
         self.cmd.get_results(fp=query_result_buffer, inline=True, delim=COL_DELIM)
         query_result = query_result_buffer.getvalue()
         query_result_buffer.close()
         return query_result
     else:
         log.info("Qubole command not found")
Exemple #14
0
    def get_connection(cls, conn_id: str) -> Connection:
        """
        Get random connection selected from all connections configured with this connection id.

        :param conn_id: connection id
        :return: connection
        """
        conn = random.choice(list(cls.get_connections(conn_id)))
        if conn.host:
            log = LoggingMixin().log
            log.info("Using connection to: %s", conn.debug_info())
        return conn
Exemple #15
0
def execute_command(command_to_exec):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command_to_exec)
    env = os.environ.copy()
    try:
        subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT,
                              close_fds=True, env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)

        raise AirflowException('Celery command failed')
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    env = os.environ.copy()
    try:
        subprocess.check_call(command, stderr=subprocess.STDOUT,
                              close_fds=True, env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)

        raise AirflowException('Celery command failed')
Exemple #17
0
    def api_call(self,
                 method='GET',
                 route=None,
                 query_params=None,
                 accept=None):

        if route is None:
            raise AirflowException('provide valid value to argument route ')
        if not isinstance(query_params, dict):
            raise TypeError('query_param must be of type dict')
        if method not in ['GET', 'POST']:
            raise AirflowException('Method not implemented')

        if accept is not None:
            if (not str(accept).__eq__("application/json")) or (
                    str(accept).__eq__('application/xml')):
                raise AirflowException(
                    "ServiceNowHook : accept can have only two value application\\json or application\\xml"
                )

        #BASIC Authentication
        if self.auth_type == 0:
            sc = ServiceNowClient(auth_type=0,
                                  host=self.snow_cred.host,
                                  login=self.snow_cred.login,
                                  password=self.snow_cred.password)
            log = LoggingMixin().log
            log.info("Using basic connection to: %s",
                     self.snow_cred.debug_info())

            rc = sc.api_call(method=method,
                             route=route,
                             query_params=query_params,
                             accept=accept)
            return rc

        #BEARER Authentication
        elif self.auth_type == 1:
            sc = ServiceNowClient(auth_type=1,
                                  host=self.snow_cred.host,
                                  token=self.snow_cred.password)

            log = LoggingMixin().log
            log.info("Using oauth connection to: %s",
                     self.snow_cred.debug_info())

            rc = sc.api_call(method=method,
                             route=route,
                             query_params=query_params,
                             accept=accept)
            return rc
Exemple #18
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(
        apikey=configuration.get('sendgrid', 'SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if response.status_code >= 200 and response.status_code < 300:
        log.info(
            'The following email with subject %s is successfully sent to sendgrid.'
            % subject)
    else:
        log.warning(
            'Failed to send out email with subject %s, status code: %s' %
            (subject, response.status_code))
Exemple #19
0
def get_default_executor():
    """Creates a new instance of the configured executor if none exists and returns it"""
    global DEFAULT_EXECUTOR

    if DEFAULT_EXECUTOR is not None:
        return DEFAULT_EXECUTOR

    executor_name = conf.get('core', 'EXECUTOR')

    DEFAULT_EXECUTOR = _get_executor(executor_name)

    log = LoggingMixin().log
    log.info("Using executor %s", executor_name)

    return DEFAULT_EXECUTOR
def GetDefaultExecutor():
    """Creates a new instance of the configured executor if none exists and returns it"""
    global DEFAULT_EXECUTOR

    if DEFAULT_EXECUTOR is not None:
        return DEFAULT_EXECUTOR

    executor_name = configuration.get('core', 'EXECUTOR')

    DEFAULT_EXECUTOR = _get_executor(executor_name)

    log = LoggingMixin().log
    log.info("Using executor %s", executor_name)

    return DEFAULT_EXECUTOR
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                log = LoggingMixin().log
                if cmd.status == 'done':
                    log.info('Command ID: %s has been succeeded, hence marking this '
                                'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Exemple #22
0
 def get_tweepy_api(self,
                    wait_on_rate_limit=True,
                    wait_on_rate_limit_notify=True):
     log = LoggingMixin().log
     try:
         auth = tweepy.AppAuthHandler(self.consumer_key,
                                      self.consumer_secret)
         api = tweepy.API(
             auth,
             wait_on_rate_limit=wait_on_rate_limit,
             wait_on_rate_limit_notify=wait_on_rate_limit_notify)
         log.info("Using connection to twitter API ")
         return api
     except Exception:
         raise AirflowException("Twitter credentials not valid")
Exemple #23
0
def qyweixin_msg_sender(msg):
    log = LoggingMixin().log
    bot_url = configuration.get('qyweixin', 'QYWEIXIN_BOT_URL')
    headers = {'Content-Type': 'application/json'}

    #md_text = {
    #    "content": msg
    #}
    print(msg)
    #post_data = {
    #    "msgtype": "markdown",
    #    "markdown": md_text
    #}
    #print(post_data)
    #r = requests.post(bot_url, headers=headers,data=json.dumps(post_data))
    r = requests.post(bot_url, headers=headers, data=json.dumps(msg))
    print(r)
    log.info("Sent an alert message to qyweixin.....")
def _poll_with_exponential_delay(request, max_n, is_done_func, is_error_func):
    log = LoggingMixin().log

    for i in range(0, max_n):
        try:
            response = request.execute()
            if is_error_func(response):
                raise ValueError(
                    'The response contained an error: {}'.format(response))
            elif is_done_func(response):
                log.info('Operation is done: %s', response)
                return response
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
        except errors.HttpError as e:
            if e.resp.status != 429:
                log.info('Something went wrong. Not retrying: %s', format(e))
                raise
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
def dingbot_msg_sender(msg):
    log = LoggingMixin().log
    bot_url = configuration.get('dingding', 'DING_BOT_URL')
    headers = {'Content-Type': 'application/json'}

    #md_text = {
    #    "title": "AIRFLOW ERROR",
    #    "text": msg
    #}
    print(msg)
    #post_data = {
    #    "msgtype": "markdown",
    #    "markdown": md_text
    #}
    #print(post_data)
    r = requests.post(bot_url, headers=headers, data=json.dumps(msg))
    print(r)
    with open('/usr/local/airflow/logs/ali_phone_call.log', 'a') as the_file:
        the_file.write('1\n')
    log.info("Sent an alert message to dingding.....")
def _poll_with_exponential_delay(request, max_n, is_done_func, is_error_func):
    log = LoggingMixin().log

    for i in range(0, max_n):
        try:
            response = request.execute()
            if is_error_func(response):
                raise ValueError(
                    'The response contained an error: {}'.format(response)
                )
            elif is_done_func(response):
                log.info('Operation is done: %s', response)
                return response
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
        except HttpError as e:
            if e.resp.status != 429:
                log.info('Something went wrong. Not retrying: %s', format(e))
                raise
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
Exemple #27
0
    return template.format(**all_vars)


TEST_CONFIG_FILE = AIRFLOW_HOME + '/unittests.cfg'

# only generate a Fernet key if we need to create a new config file
if not os.path.isfile(TEST_CONFIG_FILE) or not os.path.isfile(AIRFLOW_CONFIG):
    FERNET_KEY = generate_fernet_key()
else:
    FERNET_KEY = ''

TEMPLATE_START = (
    '# ----------------------- TEMPLATE BEGINS HERE -----------------------')
if not os.path.isfile(TEST_CONFIG_FILE):
    log.info(
        'Creating new Airflow config file for unit tests in: %s', TEST_CONFIG_FILE
    )
    with open(TEST_CONFIG_FILE, 'w') as f:
        cfg = parameterized_config(TEST_CONFIG)
        f.write(cfg.split(TEMPLATE_START)[-1].strip())
if not os.path.isfile(AIRFLOW_CONFIG):
    log.info(
        'Creating new Airflow config file in: %s',
        AIRFLOW_CONFIG
    )
    with open(AIRFLOW_CONFIG, 'w') as f:
        cfg = parameterized_config(DEFAULT_CONFIG)
        f.write(cfg.split(TEMPLATE_START)[-1].strip())

log.info("Reading the config from %s", AIRFLOW_CONFIG)
Exemple #28
0
# under the License.
"""Default celery configuration."""
import ssl

from airflow.configuration import conf
from airflow.exceptions import AirflowConfigException, AirflowException
from airflow.utils.log.logging_mixin import LoggingMixin

def _broker_supports_visibility_timeout(url):
    return url.startswith("redis://") or url.startswith("sqs://")

log = LoggingMixin().log

#broker_url = conf.get('celery', 'BROKER_URL')
broker_url = 'sqla+mysql://root:3point142@maria01:3306/airflow'
log.info('Using broker_url ' + broker_url)

#result_backend = conf.get('celery', 'RESULT_BACKEND')
result_backend = "db+mysql://root:3point142@maria01:3306/airflow"
log.info('Using result_backend ' + result_backend)

broker_transport_options = conf.getsection('celery_broker_transport_options') or {}
if 'visibility_timeout' not in broker_transport_options:
    if _broker_supports_visibility_timeout(broker_url):
        broker_transport_options['visibility_timeout'] = 21600

DEFAULT_CELERY_CONFIG = {
    'accept_content': ['json', 'pickle'],
    'event_serializer': 'json',
    'worker_prefetch_multiplier': 1,
    'task_acks_late': True,
Exemple #29
0
    all_vars = {k: v for d in [globals(), locals()] for k, v in d.items()}
    return template.format(**all_vars)


TEST_CONFIG_FILE = AIRFLOW_HOME + '/unittests.cfg'

# only generate a Fernet key if we need to create a new config file
if not os.path.isfile(TEST_CONFIG_FILE) or not os.path.isfile(AIRFLOW_CONFIG):
    FERNET_KEY = generate_fernet_key()
else:
    FERNET_KEY = ''

TEMPLATE_START = (
    '# ----------------------- TEMPLATE BEGINS HERE -----------------------')
if not os.path.isfile(TEST_CONFIG_FILE):
    log.info('Creating new Airflow config file for unit tests in: %s',
             TEST_CONFIG_FILE)
    with open(TEST_CONFIG_FILE, 'w') as f:
        cfg = parameterized_config(TEST_CONFIG)
        f.write(cfg.split(TEMPLATE_START)[-1].strip())
if not os.path.isfile(AIRFLOW_CONFIG):
    log.info('Creating new Airflow config file in: %s', AIRFLOW_CONFIG)
    with open(AIRFLOW_CONFIG, 'w') as f:
        cfg = parameterized_config(DEFAULT_CONFIG)
        cfg = cfg.split(TEMPLATE_START)[-1].strip()
        if six.PY2:
            cfg = cfg.encode('utf8')
        f.write(cfg)

log.info("Reading the config from %s", AIRFLOW_CONFIG)

conf = AirflowConfigParser(default_config=parameterized_config(DEFAULT_CONFIG))
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""Default celery configuration."""
import ssl
from airflow.utils.log.logging_mixin import LoggingMixin

log = LoggingMixin().log

#broker_url = conf.get('celery', 'BROKER_URL')
broker_url = 'pyamqp://*****:*****@rabbit01:5672/airflow'
log.info('Using broker_url ' + broker_url)

#result_backend = conf.get('celery', 'RESULT_BACKEND')
result_backend = "db+mysql://root:3point142@maria01:3306/airflow"
log.info('Using result_backend ' + result_backend)

default_queue = "celery.inbound"
log.info('Using default_queue ' + default_queue)

worker_concurrency = "16"
log.info('Using worker_concurrency ' + worker_concurrency)

DEFAULT_CELERY_CONFIG = {
    'accept_content': ['json', 'pickle'],
    'event_serializer': 'json',
    'worker_prefetch_multiplier': 1,
Exemple #31
0
def fetch_servicenow_record_count(table_name, execution_date, **kwargs):
    """
    This method calls the service now api for a particular table and time period
    and gets count of records for a particular table
    :param: table_name : for which count of records is fetched
    :param: execution_date : airflow execution date of the dag
    :return: task_id
    """

    # check for empty
    if is_empty(table_name) or is_empty(execution_date):
        raise InvalidArguments("table_name, execution_date can't be empty")

    # check for none
    if table_name is None or execution_date is None:
        raise InvalidArguments("table_name, execution_date can't be None")

    try:

        try:
            # Load Configuration Data
            config = json.loads(Variable.get("config"))
            frequency = config['frequency']
            execution_datetime = datetime.strptime(execution_date[:19],
                                                   "%Y-%m-%dT%H:%M:%S")

            if frequency == 'hourly':
                freq_param = timedelta(hours=-1)

            elif frequency == 'daily':
                freq_param = timedelta(days=-1)
            elif frequency == 'monthly':
                freq_param = timedelta(days=-1 * one_month_ago(execution_date))

            elif frequency == 'half-hourly':
                freq_param = timedelta(minutes=-30)
            else:
                freq_param = timedelta(hours=-1)

            to_time = datetime(year=execution_datetime.year,
                               month=execution_datetime.month,
                               day=execution_datetime.day,
                               hour=execution_datetime.hour,
                               minute=execution_datetime.minute,
                               second=execution_datetime.second,
                               tzinfo=pendulum.timezone("UTC"))
            from_time = to_time + freq_param

        except KeyError as e:
            raise ConfigVariableNotFoundException()
        try:
            credentials_snow = BaseHook.get_connection("servicenow_default")
            login = credentials_snow.login
            password = credentials_snow.password
            host = credentials_snow.host
        except AirflowException as e:
            raise ServiceNowConnectionNotFoundException()

        service_now_hook = ServiceNowHook(host=host,
                                          login=login,
                                          password=password)
        response = service_now_hook.api_call(
            route='/api/now/stats/{}'.format(table_name),
            accept='application/json',
            query_params={
                'sysparm_count':
                'true',
                'sysparm_query':
                "sys_updated_onBETWEENjavascript:gs.dateGenerate('{}','{}')"
                "@javascript:gs.dateGenerate('{}','{}')".format(
                    str(from_time.date()), str(from_time.time()),
                    str(to_time.date()), str(to_time.time()))
            })
        print('response :' + response)
        count_of_records = int(
            json.loads(response)['result']['stats']['count'])

        log = LoggingMixin().log
        log.info("Getting count from: {}  to : {} ".format(from_time, to_time))
        log.info("totals number of records %s ", str(count_of_records))

        if int(count_of_records) == 0:
            return 'count_is_zero'
        elif int(count_of_records) > config['threshold']:
            return 'count_exceeds_threshold'
        else:
            return 'count_within_threshold'

    except Exception as e:

        kwargs['ti'].xcom_push(key='exception', value=str(e))

        instance = kwargs['task_instance']
        dag_id = str(instance.dag_id)
        task_id = str(instance.task_id)
        msg = str(e)
        execution_date = str(instance.execution_date)
        run_id = str(kwargs['run_id'])

        execution_date = execution_date.replace('T', ' ')[0:19]
        key = '{}${}'.format(execution_date, dag_id)

        value = {
            'dag_id': dag_id,
            'execution_date': execution_date,
            'task_id': task_id,
            'run_id': run_id,
            'error_msg': msg
        }

        Variable.set(key=key, value=json.dumps(value))

        raise
Exemple #32
0
    engine_args = {}
    if disable_connection_pool:
        engine_args['poolclass'] = NullPool
    elif 'sqlite' not in SQL_ALCHEMY_CONN:
        # Engine args not supported by sqlite
        engine_args['pool_size'] = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
        engine_args['pool_recycle'] = conf.getint('core',
                                                  'SQL_ALCHEMY_POOL_RECYCLE')

    engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
    Session = scoped_session(
        sessionmaker(autocommit=False, autoflush=False, bind=engine))

try:
    from airflow_local_settings import *
    log.info("Loaded airflow_local_settings.")
except:
    pass

configure_logging()
configure_vars()
configure_orm()

# TODO: Unify airflow logging setups. Please see AIRFLOW-1457.
logging_config_path = conf.get('core', 'logging_config_path')
try:
    from logging_config_path import LOGGING_CONFIG
    log.debug("Successfully imported user-defined logging config.")
except Exception as e:
    # Import default logging configurations.
    log.debug(
Exemple #33
0
 def get_connection(cls, conn_id):  # type: (str) -> Connection
     conn = random.choice(list(cls.get_connections(conn_id)))
     if conn.host:
         log = LoggingMixin().log
         log.info("Using connection to: %s", conn.debug_info())
     return conn
Exemple #34
0
 def get_connection(cls, conn_id):
     conn = random.choice(cls.get_connections(conn_id))
     if conn.host:
         log = LoggingMixin().log
         log.info("Using connection to: %s", conn.debug_info())
     return conn
Exemple #35
0
 def get_connection(cls, conn_id):  # type: (str) -> Connection
     conn = random.choice(list(cls.get_connections(conn_id)))
     if conn.host:
         log = LoggingMixin().log
         log.info("Using connection to: %s", conn.log_info())
     return conn
Exemple #36
0
 def get_connection(cls, conn_id):
     conn = random.choice(cls.get_connections(conn_id))
     if conn.host:
         log = LoggingMixin().log
         log.info("Using connection to: %s", conn.host)
     return conn
        try:
            conn = get_ldap_connection(entry['dn'], password)
        except KeyError:
            log.error("""
            Unable to parse LDAP structure. If you're using Active Directory
            and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg.
            %s
            """, traceback.format_exc())
            raise LdapException(
                "Could not parse LDAP structure. "
                "Try setting search_scope in airflow.cfg, or check logs"
            )

        if not conn:
            log.info("Password incorrect for user %s", username)
            raise AuthenticationError("Invalid username or password")

    @property
    def is_active(self):
        """Required by flask_login"""
        return True

    @property
    def is_authenticated(self):
        """Required by flask_login"""
        return True

    @property
    def is_anonymous(self):
        """Required by flask_login"""
Exemple #38
0
def check_if_tweet_is_avalaible(twitter_account_id=None,
                                since_id=None,
                                find_param=None,
                                **kwargs):
    """
    This method tweepy api via TwitterHook to check if a tweet from a specific twitter_account
    containing a specific search_string or not
    :param: twitter_account_id : for which tweets are to be fetched
    :param: since_id : airflow execution date of the dag
    :return: tweet_id
    """
    log = LoggingMixin().log
    try:
        # Load Configuration Data
        config = json.loads(Variable.get("config"))
        log.info("Config found")

    except AirflowException as e:
        log.error("Config missing")
        raise ConfigVariableNotFoundException()

    try:
        twitter_account_id = config['twitter_account_id']
    except KeyError as e:
        raise AirflowException('Missing Twitter Account Id in config variable')

    try:
        since_id = config['since_id']
    except KeyError as e:
        log.warn("Since id missing")

    try:
        find_param = config['find_param'].lower()
    except KeyError as e:
        raise AirflowException('Missing Find Param in config variable')

    try:
        twitter_credentials = BaseHook.get_connection("twitter_default")
        twitter_credentials = json.loads(twitter_credentials.extra)
        consumer_key = twitter_credentials['consumer_key']
        consumer_secret = twitter_credentials['consumer_secret']
        access_token = twitter_credentials['access_token']
        access_token_secret = twitter_credentials['access_token_secret']

    except AirflowException as e:
        raise TwitterConnectionNotFoundException()

    twitter_hook = TwitterHook(consumer_key=consumer_key,
                               consumer_secret=consumer_secret,
                               access_token=access_token,
                               access_token_secret=access_token_secret)

    tweepy_api = twitter_hook.get_tweepy_api()
    today = date.today()
    curr_date = today.strftime("%d-%m-%Y")
    # try to get tweet related to covid media bulliten from @diprjk handle

    tweets = tweepy_api.user_timeline(id=twitter_account_id,
                                      since_id=since_id,
                                      count=1000,
                                      exclude_replies=True,
                                      include_rts=False,
                                      tweet_mode="extended")
    if len(tweets) > 0:
        # find_param = "Media Bulletin on Novel".lower()
        log.info("Found : {}  tweets".format(len(tweets) + 1))
        # loop over all extracted tweets and
        # if tweet.full_text contains string "Media Bulletin On Novel"
        # then we got our concerned tweet and save its tweet_id
        image_urls = []
        for tweet in tweets:
            tweet_date = tweet.created_at
            tweet_date = tweet_date.strftime("%d-%m-%Y")
            text = tweet.full_text.lower()
            if find_param in text and tweet_date == curr_date:
                bulletin_tweet_id = tweet.id
                print('Tweet found')
                # save bulliten tweet id as environ variable or on file and then use in next run
                log.info("Tweet ID: {}  TEXT : {} ".format(
                    bulletin_tweet_id, tweet.full_text))
                if 'media' in tweet.entities:
                    for media in tweet.extended_entities['media']:
                        image_urls.append(media['media_url'])
                    detail_image_url = image_urls[2]
                    log.info("Tweet Image Url: {} ".format(detail_image_url))
                else:
                    log.info("No media found")
                    #skip the processing and end dag
                    return False
                data = {
                    "tweet_id": bulletin_tweet_id,
                    "tweet_date": tweet_date,
                    "media_url": detail_image_url
                }
                Variable.set("bulliten_tweet", json.dumps(data))
                return True
            else:
                pass
        else:
            log.info("No tweets related to {} found".format(find_param))
            return False

    else:
        log.info("No tweets found!")
        return False