def send_mime_email(e_from, e_to, mime_msg, dryrun=False): """ Send MIME email. """ log = LoggingMixin().log smtp_host = conf.get('smtp', 'SMTP_HOST') smtp_port = conf.getint('smtp', 'SMTP_PORT') smtp_starttls = conf.getboolean('smtp', 'SMTP_STARTTLS') smtp_ssl = conf.getboolean('smtp', 'SMTP_SSL') smtp_user = None smtp_password = None try: smtp_user = conf.get('smtp', 'SMTP_USER') smtp_password = conf.get('smtp', 'SMTP_PASSWORD') except AirflowConfigException: log.debug("No user/password found for SMTP, so logging in with no authentication.") if not dryrun: conn = smtplib.SMTP_SSL(smtp_host, smtp_port) if smtp_ssl else smtplib.SMTP(smtp_host, smtp_port) if smtp_starttls: conn.starttls() if smtp_user and smtp_password: conn.login(smtp_user, smtp_password) log.info("Sent an alert email to %s", e_to) conn.sendmail(e_from, e_to, mime_msg.as_string()) conn.quit()
def _split_tablename(table_input, default_project_id, var_name=None): assert default_project_id is not None, "INTERNAL: No default project is specified" def var_print(var_name): if var_name is None: return "" else: return "Format exception for {var}: ".format(var=var_name) if table_input.count('.') + table_input.count(':') > 3: raise Exception(( '{var}Use either : or . to specify project ' 'got {input}' ).format(var=var_print(var_name), input=table_input)) cmpt = table_input.rsplit(':', 1) project_id = None rest = table_input if len(cmpt) == 1: project_id = None rest = cmpt[0] elif len(cmpt) == 2 and cmpt[0].count(':') <= 1: if cmpt[-1].count('.') != 2: project_id = cmpt[0] rest = cmpt[1] else: raise Exception(( '{var}Expect format of (<project:)<dataset>.<table>, ' 'got {input}' ).format(var=var_print(var_name), input=table_input)) cmpt = rest.split('.') if len(cmpt) == 3: assert project_id is None, ( "{var}Use either : or . to specify project" ).format(var=var_print(var_name)) project_id = cmpt[0] dataset_id = cmpt[1] table_id = cmpt[2] elif len(cmpt) == 2: dataset_id = cmpt[0] table_id = cmpt[1] else: raise Exception(( '{var}Expect format of (<project.|<project:)<dataset>.<table>, ' 'got {input}' ).format(var=var_print(var_name), input=table_input)) if project_id is None: if var_name is not None: log = LoggingMixin().log log.info( 'Project not included in {var}: {input}; using project "{project}"'.format( var=var_name, input=table_input, project=default_project_id ) ) project_id = default_project_id return project_id, dataset_id, table_id
def send_notification(receiver, subject, html_content, **kwargs): """ Send Slack notification using configuration from config file. :param receiver: email receiver, used to implement signature :param subject: email receiver, used to implement signature :param: html_content: message content, typically html format is used :return: """ _ = receiver _ = subject _ = kwargs log = LoggingMixin().log try: token = configuration.conf.get('slack', 'TOKEN') channel = configuration.conf.get('slack', 'CHANNEL') username = configuration.conf.get('slack', 'USERNAME') for value in (token, channel, username): if not value: raise AirflowConfigException("error: empty value") except AirflowConfigException: log.info("No token/channel/username found for slack.") return send_notification_to_channel(format_message(html_content), channel, token, username)
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False): log = LoggingMixin().log SMTP_HOST = configuration.conf.get('smtp', 'SMTP_HOST') SMTP_PORT = configuration.conf.getint('smtp', 'SMTP_PORT') SMTP_STARTTLS = configuration.conf.getboolean('smtp', 'SMTP_STARTTLS') SMTP_SSL = configuration.conf.getboolean('smtp', 'SMTP_SSL') SMTP_USER = None SMTP_PASSWORD = None try: SMTP_USER = configuration.conf.get('smtp', 'SMTP_USER') SMTP_PASSWORD = configuration.conf.get('smtp', 'SMTP_PASSWORD') except AirflowConfigException: log.debug("No user/password found for SMTP, so logging in with no authentication.") if not dryrun: s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP(SMTP_HOST, SMTP_PORT) if SMTP_STARTTLS: s.starttls() if SMTP_USER and SMTP_PASSWORD: s.login(SMTP_USER, SMTP_PASSWORD) log.info("Sent an alert email to %s", e_to) s.sendmail(e_from, e_to, mime_msg.as_string()) s.quit()
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False): log = LoggingMixin().log SMTP_HOST = configuration.conf.get('smtp', 'SMTP_HOST') SMTP_PORT = configuration.conf.getint('smtp', 'SMTP_PORT') SMTP_STARTTLS = configuration.conf.getboolean('smtp', 'SMTP_STARTTLS') SMTP_SSL = configuration.conf.getboolean('smtp', 'SMTP_SSL') SMTP_USER = None SMTP_PASSWORD = None try: SMTP_USER = configuration.conf.get('smtp', 'SMTP_USER') SMTP_PASSWORD = configuration.conf.get('smtp', 'SMTP_PASSWORD') except AirflowConfigException: log.debug( "No user/password found for SMTP, so logging in with no authentication." ) if not dryrun: s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP( SMTP_HOST, SMTP_PORT) if SMTP_STARTTLS: s.starttls() if SMTP_USER and SMTP_PASSWORD: s.login(SMTP_USER, SMTP_PASSWORD) log.info("Sent an alert email to %s", e_to) s.sendmail(e_from, e_to, mime_msg.as_string()) s.quit()
def fetchAccessToken( self, client_id=None, client_secret=None, login=None, password=None, refresh_token=None, ): sc = ServiceNowClient( auth_type=1, host=self.snow_cred.host, ) self.snow_cred.password = sc.fetchAccessToken( client_id=client_id, client_secret=client_secret, login=login, password=password, refresh_token=refresh_token) log = LoggingMixin().log log.info("Using connection to fetch access_token : %s", self.snow_cred.debug_info()) return self.snow_cred.password
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) try: subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as e: log.error(e) raise AirflowException('Celery command failed')
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) try: subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as e: log.error(e) raise AirflowException('Celery command failed')
def _split_tablename(table_input, default_project_id, var_name=None): assert default_project_id is not None, "INTERNAL: No default project is specified" def var_print(var_name): if var_name is None: return "" else: return "Format exception for {var}: ".format(var=var_name) if table_input.count('.') + table_input.count(':') > 3: raise Exception(('{var}Use either : or . to specify project ' 'got {input}').format(var=var_print(var_name), input=table_input)) cmpt = table_input.rsplit(':', 1) project_id = None rest = table_input if len(cmpt) == 1: project_id = None rest = cmpt[0] elif len(cmpt) == 2 and cmpt[0].count(':') <= 1: if cmpt[-1].count('.') != 2: project_id = cmpt[0] rest = cmpt[1] else: raise Exception(('{var}Expect format of (<project:)<dataset>.<table>, ' 'got {input}').format(var=var_print(var_name), input=table_input)) cmpt = rest.split('.') if len(cmpt) == 3: assert project_id is None, ( "{var}Use either : or . to specify project").format( var=var_print(var_name)) project_id = cmpt[0] dataset_id = cmpt[1] table_id = cmpt[2] elif len(cmpt) == 2: dataset_id = cmpt[0] table_id = cmpt[1] else: raise Exception( ('{var}Expect format of (<project.|<project:)<dataset>.<table>, ' 'got {input}').format(var=var_print(var_name), input=table_input)) if project_id is None: if var_name is not None: log = LoggingMixin().log log.info('Project not included in {var}: {input}; ' 'using project "{project}"'.format( var=var_name, input=table_input, project=default_project_id)) project_id = default_project_id return project_id, dataset_id, table_id
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: if cmd.status == 'running': log = LoggingMixin().log log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if response.status_code >= 200 and response.status_code < 300: log.info('Email with subject %s is successfully sent to recipients: %s' % (mail_data['subject'], mail_data['personalizations'])) else: log.warning('Failed to send out email with subject %s, status code: %s' % (mail_data['subject'], response.status_code))
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if 200 <= response.status_code < 300: log.info('Email with subject %s is successfully sent to recipients: %s', mail_data['subject'], mail_data['personalizations']) else: log.warning('Failed to send out email with subject %s, status code: %s', mail_data['subject'], response.status_code)
def get_query_results(self): log = LoggingMixin().log if self.cmd is not None: cmd_id = self.cmd.id log.info("command id: " + str(cmd_id)) query_result_buffer = StringIO() self.cmd.get_results(fp=query_result_buffer, inline=True, delim=COL_DELIM) query_result = query_result_buffer.getvalue() query_result_buffer.close() return query_result else: log.info("Qubole command not found")
def get_connection(cls, conn_id: str) -> Connection: """ Get random connection selected from all connections configured with this connection id. :param conn_id: connection id :return: connection """ conn = random.choice(list(cls.get_connections(conn_id))) if conn.host: log = LoggingMixin().log log.info("Using connection to: %s", conn.debug_info()) return conn
def execute_command(command_to_exec): log = LoggingMixin().log log.info("Executing command in Celery: %s", command_to_exec) env = os.environ.copy() try: subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) env = os.environ.copy() try: subprocess.check_call(command, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def api_call(self, method='GET', route=None, query_params=None, accept=None): if route is None: raise AirflowException('provide valid value to argument route ') if not isinstance(query_params, dict): raise TypeError('query_param must be of type dict') if method not in ['GET', 'POST']: raise AirflowException('Method not implemented') if accept is not None: if (not str(accept).__eq__("application/json")) or ( str(accept).__eq__('application/xml')): raise AirflowException( "ServiceNowHook : accept can have only two value application\\json or application\\xml" ) #BASIC Authentication if self.auth_type == 0: sc = ServiceNowClient(auth_type=0, host=self.snow_cred.host, login=self.snow_cred.login, password=self.snow_cred.password) log = LoggingMixin().log log.info("Using basic connection to: %s", self.snow_cred.debug_info()) rc = sc.api_call(method=method, route=route, query_params=query_params, accept=accept) return rc #BEARER Authentication elif self.auth_type == 1: sc = ServiceNowClient(auth_type=1, host=self.snow_cred.host, token=self.snow_cred.password) log = LoggingMixin().log log.info("Using oauth connection to: %s", self.snow_cred.debug_info()) rc = sc.api_call(method=method, route=route, query_params=query_params, accept=accept) return rc
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient( apikey=configuration.get('sendgrid', 'SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if response.status_code >= 200 and response.status_code < 300: log.info( 'The following email with subject %s is successfully sent to sendgrid.' % subject) else: log.warning( 'Failed to send out email with subject %s, status code: %s' % (subject, response.status_code))
def get_default_executor(): """Creates a new instance of the configured executor if none exists and returns it""" global DEFAULT_EXECUTOR if DEFAULT_EXECUTOR is not None: return DEFAULT_EXECUTOR executor_name = conf.get('core', 'EXECUTOR') DEFAULT_EXECUTOR = _get_executor(executor_name) log = LoggingMixin().log log.info("Using executor %s", executor_name) return DEFAULT_EXECUTOR
def GetDefaultExecutor(): """Creates a new instance of the configured executor if none exists and returns it""" global DEFAULT_EXECUTOR if DEFAULT_EXECUTOR is not None: return DEFAULT_EXECUTOR executor_name = configuration.get('core', 'EXECUTOR') DEFAULT_EXECUTOR = _get_executor(executor_name) log = LoggingMixin().log log.info("Using executor %s", executor_name) return DEFAULT_EXECUTOR
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: log = LoggingMixin().log if cmd.status == 'done': log.info('Command ID: %s has been succeeded, hence marking this ' 'TI as Success.', cmd_id) ti.state = State.SUCCESS elif cmd.status == 'running': log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def get_tweepy_api(self, wait_on_rate_limit=True, wait_on_rate_limit_notify=True): log = LoggingMixin().log try: auth = tweepy.AppAuthHandler(self.consumer_key, self.consumer_secret) api = tweepy.API( auth, wait_on_rate_limit=wait_on_rate_limit, wait_on_rate_limit_notify=wait_on_rate_limit_notify) log.info("Using connection to twitter API ") return api except Exception: raise AirflowException("Twitter credentials not valid")
def qyweixin_msg_sender(msg): log = LoggingMixin().log bot_url = configuration.get('qyweixin', 'QYWEIXIN_BOT_URL') headers = {'Content-Type': 'application/json'} #md_text = { # "content": msg #} print(msg) #post_data = { # "msgtype": "markdown", # "markdown": md_text #} #print(post_data) #r = requests.post(bot_url, headers=headers,data=json.dumps(post_data)) r = requests.post(bot_url, headers=headers, data=json.dumps(msg)) print(r) log.info("Sent an alert message to qyweixin.....")
def _poll_with_exponential_delay(request, max_n, is_done_func, is_error_func): log = LoggingMixin().log for i in range(0, max_n): try: response = request.execute() if is_error_func(response): raise ValueError( 'The response contained an error: {}'.format(response)) elif is_done_func(response): log.info('Operation is done: %s', response) return response else: time.sleep((2**i) + (random.randint(0, 1000) / 1000)) except errors.HttpError as e: if e.resp.status != 429: log.info('Something went wrong. Not retrying: %s', format(e)) raise else: time.sleep((2**i) + (random.randint(0, 1000) / 1000))
def dingbot_msg_sender(msg): log = LoggingMixin().log bot_url = configuration.get('dingding', 'DING_BOT_URL') headers = {'Content-Type': 'application/json'} #md_text = { # "title": "AIRFLOW ERROR", # "text": msg #} print(msg) #post_data = { # "msgtype": "markdown", # "markdown": md_text #} #print(post_data) r = requests.post(bot_url, headers=headers, data=json.dumps(msg)) print(r) with open('/usr/local/airflow/logs/ali_phone_call.log', 'a') as the_file: the_file.write('1\n') log.info("Sent an alert message to dingding.....")
def _poll_with_exponential_delay(request, max_n, is_done_func, is_error_func): log = LoggingMixin().log for i in range(0, max_n): try: response = request.execute() if is_error_func(response): raise ValueError( 'The response contained an error: {}'.format(response) ) elif is_done_func(response): log.info('Operation is done: %s', response) return response else: time.sleep((2**i) + (random.randint(0, 1000) / 1000)) except HttpError as e: if e.resp.status != 429: log.info('Something went wrong. Not retrying: %s', format(e)) raise else: time.sleep((2**i) + (random.randint(0, 1000) / 1000))
return template.format(**all_vars) TEST_CONFIG_FILE = AIRFLOW_HOME + '/unittests.cfg' # only generate a Fernet key if we need to create a new config file if not os.path.isfile(TEST_CONFIG_FILE) or not os.path.isfile(AIRFLOW_CONFIG): FERNET_KEY = generate_fernet_key() else: FERNET_KEY = '' TEMPLATE_START = ( '# ----------------------- TEMPLATE BEGINS HERE -----------------------') if not os.path.isfile(TEST_CONFIG_FILE): log.info( 'Creating new Airflow config file for unit tests in: %s', TEST_CONFIG_FILE ) with open(TEST_CONFIG_FILE, 'w') as f: cfg = parameterized_config(TEST_CONFIG) f.write(cfg.split(TEMPLATE_START)[-1].strip()) if not os.path.isfile(AIRFLOW_CONFIG): log.info( 'Creating new Airflow config file in: %s', AIRFLOW_CONFIG ) with open(AIRFLOW_CONFIG, 'w') as f: cfg = parameterized_config(DEFAULT_CONFIG) f.write(cfg.split(TEMPLATE_START)[-1].strip()) log.info("Reading the config from %s", AIRFLOW_CONFIG)
# under the License. """Default celery configuration.""" import ssl from airflow.configuration import conf from airflow.exceptions import AirflowConfigException, AirflowException from airflow.utils.log.logging_mixin import LoggingMixin def _broker_supports_visibility_timeout(url): return url.startswith("redis://") or url.startswith("sqs://") log = LoggingMixin().log #broker_url = conf.get('celery', 'BROKER_URL') broker_url = 'sqla+mysql://root:3point142@maria01:3306/airflow' log.info('Using broker_url ' + broker_url) #result_backend = conf.get('celery', 'RESULT_BACKEND') result_backend = "db+mysql://root:3point142@maria01:3306/airflow" log.info('Using result_backend ' + result_backend) broker_transport_options = conf.getsection('celery_broker_transport_options') or {} if 'visibility_timeout' not in broker_transport_options: if _broker_supports_visibility_timeout(broker_url): broker_transport_options['visibility_timeout'] = 21600 DEFAULT_CELERY_CONFIG = { 'accept_content': ['json', 'pickle'], 'event_serializer': 'json', 'worker_prefetch_multiplier': 1, 'task_acks_late': True,
all_vars = {k: v for d in [globals(), locals()] for k, v in d.items()} return template.format(**all_vars) TEST_CONFIG_FILE = AIRFLOW_HOME + '/unittests.cfg' # only generate a Fernet key if we need to create a new config file if not os.path.isfile(TEST_CONFIG_FILE) or not os.path.isfile(AIRFLOW_CONFIG): FERNET_KEY = generate_fernet_key() else: FERNET_KEY = '' TEMPLATE_START = ( '# ----------------------- TEMPLATE BEGINS HERE -----------------------') if not os.path.isfile(TEST_CONFIG_FILE): log.info('Creating new Airflow config file for unit tests in: %s', TEST_CONFIG_FILE) with open(TEST_CONFIG_FILE, 'w') as f: cfg = parameterized_config(TEST_CONFIG) f.write(cfg.split(TEMPLATE_START)[-1].strip()) if not os.path.isfile(AIRFLOW_CONFIG): log.info('Creating new Airflow config file in: %s', AIRFLOW_CONFIG) with open(AIRFLOW_CONFIG, 'w') as f: cfg = parameterized_config(DEFAULT_CONFIG) cfg = cfg.split(TEMPLATE_START)[-1].strip() if six.PY2: cfg = cfg.encode('utf8') f.write(cfg) log.info("Reading the config from %s", AIRFLOW_CONFIG) conf = AirflowConfigParser(default_config=parameterized_config(DEFAULT_CONFIG))
# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """Default celery configuration.""" import ssl from airflow.utils.log.logging_mixin import LoggingMixin log = LoggingMixin().log #broker_url = conf.get('celery', 'BROKER_URL') broker_url = 'pyamqp://*****:*****@rabbit01:5672/airflow' log.info('Using broker_url ' + broker_url) #result_backend = conf.get('celery', 'RESULT_BACKEND') result_backend = "db+mysql://root:3point142@maria01:3306/airflow" log.info('Using result_backend ' + result_backend) default_queue = "celery.inbound" log.info('Using default_queue ' + default_queue) worker_concurrency = "16" log.info('Using worker_concurrency ' + worker_concurrency) DEFAULT_CELERY_CONFIG = { 'accept_content': ['json', 'pickle'], 'event_serializer': 'json', 'worker_prefetch_multiplier': 1,
def fetch_servicenow_record_count(table_name, execution_date, **kwargs): """ This method calls the service now api for a particular table and time period and gets count of records for a particular table :param: table_name : for which count of records is fetched :param: execution_date : airflow execution date of the dag :return: task_id """ # check for empty if is_empty(table_name) or is_empty(execution_date): raise InvalidArguments("table_name, execution_date can't be empty") # check for none if table_name is None or execution_date is None: raise InvalidArguments("table_name, execution_date can't be None") try: try: # Load Configuration Data config = json.loads(Variable.get("config")) frequency = config['frequency'] execution_datetime = datetime.strptime(execution_date[:19], "%Y-%m-%dT%H:%M:%S") if frequency == 'hourly': freq_param = timedelta(hours=-1) elif frequency == 'daily': freq_param = timedelta(days=-1) elif frequency == 'monthly': freq_param = timedelta(days=-1 * one_month_ago(execution_date)) elif frequency == 'half-hourly': freq_param = timedelta(minutes=-30) else: freq_param = timedelta(hours=-1) to_time = datetime(year=execution_datetime.year, month=execution_datetime.month, day=execution_datetime.day, hour=execution_datetime.hour, minute=execution_datetime.minute, second=execution_datetime.second, tzinfo=pendulum.timezone("UTC")) from_time = to_time + freq_param except KeyError as e: raise ConfigVariableNotFoundException() try: credentials_snow = BaseHook.get_connection("servicenow_default") login = credentials_snow.login password = credentials_snow.password host = credentials_snow.host except AirflowException as e: raise ServiceNowConnectionNotFoundException() service_now_hook = ServiceNowHook(host=host, login=login, password=password) response = service_now_hook.api_call( route='/api/now/stats/{}'.format(table_name), accept='application/json', query_params={ 'sysparm_count': 'true', 'sysparm_query': "sys_updated_onBETWEENjavascript:gs.dateGenerate('{}','{}')" "@javascript:gs.dateGenerate('{}','{}')".format( str(from_time.date()), str(from_time.time()), str(to_time.date()), str(to_time.time())) }) print('response :' + response) count_of_records = int( json.loads(response)['result']['stats']['count']) log = LoggingMixin().log log.info("Getting count from: {} to : {} ".format(from_time, to_time)) log.info("totals number of records %s ", str(count_of_records)) if int(count_of_records) == 0: return 'count_is_zero' elif int(count_of_records) > config['threshold']: return 'count_exceeds_threshold' else: return 'count_within_threshold' except Exception as e: kwargs['ti'].xcom_push(key='exception', value=str(e)) instance = kwargs['task_instance'] dag_id = str(instance.dag_id) task_id = str(instance.task_id) msg = str(e) execution_date = str(instance.execution_date) run_id = str(kwargs['run_id']) execution_date = execution_date.replace('T', ' ')[0:19] key = '{}${}'.format(execution_date, dag_id) value = { 'dag_id': dag_id, 'execution_date': execution_date, 'task_id': task_id, 'run_id': run_id, 'error_msg': msg } Variable.set(key=key, value=json.dumps(value)) raise
engine_args = {} if disable_connection_pool: engine_args['poolclass'] = NullPool elif 'sqlite' not in SQL_ALCHEMY_CONN: # Engine args not supported by sqlite engine_args['pool_size'] = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE') engine_args['pool_recycle'] = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE') engine = create_engine(SQL_ALCHEMY_CONN, **engine_args) Session = scoped_session( sessionmaker(autocommit=False, autoflush=False, bind=engine)) try: from airflow_local_settings import * log.info("Loaded airflow_local_settings.") except: pass configure_logging() configure_vars() configure_orm() # TODO: Unify airflow logging setups. Please see AIRFLOW-1457. logging_config_path = conf.get('core', 'logging_config_path') try: from logging_config_path import LOGGING_CONFIG log.debug("Successfully imported user-defined logging config.") except Exception as e: # Import default logging configurations. log.debug(
def get_connection(cls, conn_id): # type: (str) -> Connection conn = random.choice(list(cls.get_connections(conn_id))) if conn.host: log = LoggingMixin().log log.info("Using connection to: %s", conn.debug_info()) return conn
def get_connection(cls, conn_id): conn = random.choice(cls.get_connections(conn_id)) if conn.host: log = LoggingMixin().log log.info("Using connection to: %s", conn.debug_info()) return conn
def get_connection(cls, conn_id): # type: (str) -> Connection conn = random.choice(list(cls.get_connections(conn_id))) if conn.host: log = LoggingMixin().log log.info("Using connection to: %s", conn.log_info()) return conn
def get_connection(cls, conn_id): conn = random.choice(cls.get_connections(conn_id)) if conn.host: log = LoggingMixin().log log.info("Using connection to: %s", conn.host) return conn
try: conn = get_ldap_connection(entry['dn'], password) except KeyError: log.error(""" Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg. %s """, traceback.format_exc()) raise LdapException( "Could not parse LDAP structure. " "Try setting search_scope in airflow.cfg, or check logs" ) if not conn: log.info("Password incorrect for user %s", username) raise AuthenticationError("Invalid username or password") @property def is_active(self): """Required by flask_login""" return True @property def is_authenticated(self): """Required by flask_login""" return True @property def is_anonymous(self): """Required by flask_login"""
def check_if_tweet_is_avalaible(twitter_account_id=None, since_id=None, find_param=None, **kwargs): """ This method tweepy api via TwitterHook to check if a tweet from a specific twitter_account containing a specific search_string or not :param: twitter_account_id : for which tweets are to be fetched :param: since_id : airflow execution date of the dag :return: tweet_id """ log = LoggingMixin().log try: # Load Configuration Data config = json.loads(Variable.get("config")) log.info("Config found") except AirflowException as e: log.error("Config missing") raise ConfigVariableNotFoundException() try: twitter_account_id = config['twitter_account_id'] except KeyError as e: raise AirflowException('Missing Twitter Account Id in config variable') try: since_id = config['since_id'] except KeyError as e: log.warn("Since id missing") try: find_param = config['find_param'].lower() except KeyError as e: raise AirflowException('Missing Find Param in config variable') try: twitter_credentials = BaseHook.get_connection("twitter_default") twitter_credentials = json.loads(twitter_credentials.extra) consumer_key = twitter_credentials['consumer_key'] consumer_secret = twitter_credentials['consumer_secret'] access_token = twitter_credentials['access_token'] access_token_secret = twitter_credentials['access_token_secret'] except AirflowException as e: raise TwitterConnectionNotFoundException() twitter_hook = TwitterHook(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) tweepy_api = twitter_hook.get_tweepy_api() today = date.today() curr_date = today.strftime("%d-%m-%Y") # try to get tweet related to covid media bulliten from @diprjk handle tweets = tweepy_api.user_timeline(id=twitter_account_id, since_id=since_id, count=1000, exclude_replies=True, include_rts=False, tweet_mode="extended") if len(tweets) > 0: # find_param = "Media Bulletin on Novel".lower() log.info("Found : {} tweets".format(len(tweets) + 1)) # loop over all extracted tweets and # if tweet.full_text contains string "Media Bulletin On Novel" # then we got our concerned tweet and save its tweet_id image_urls = [] for tweet in tweets: tweet_date = tweet.created_at tweet_date = tweet_date.strftime("%d-%m-%Y") text = tweet.full_text.lower() if find_param in text and tweet_date == curr_date: bulletin_tweet_id = tweet.id print('Tweet found') # save bulliten tweet id as environ variable or on file and then use in next run log.info("Tweet ID: {} TEXT : {} ".format( bulletin_tweet_id, tweet.full_text)) if 'media' in tweet.entities: for media in tweet.extended_entities['media']: image_urls.append(media['media_url']) detail_image_url = image_urls[2] log.info("Tweet Image Url: {} ".format(detail_image_url)) else: log.info("No media found") #skip the processing and end dag return False data = { "tweet_id": bulletin_tweet_id, "tweet_date": tweet_date, "media_url": detail_image_url } Variable.set("bulliten_tweet", json.dumps(data)) return True else: pass else: log.info("No tweets related to {} found".format(find_param)) return False else: log.info("No tweets found!") return False