def load_into_redis(file_path): print('checking {}'.format(file_path)) lines = [] with open('/tmp/downloader_test1', 'rb') as f: lines = f.readlines() file_md5 = ''.join([line.decode('latin-1') for line in lines]) hook = RedisHook(redis_conn_id='redis_default') hook.host = 'redis' hook.port = 6379 redis = hook.get_conn() try: redis.ping() except Exception as e: print('could not ping redis') print(str(e)) return redis.set('google', file_md5) md5_return_value = redis.get('google') print('set redis with keyvalue pair <google> {}'.format(md5_return_value))
def __init__(self, channels, redis_conn_id, *args, **kwargs): super().__init__(*args, **kwargs) self.channels = channels self.redis_conn_id = redis_conn_id self.pubsub = RedisHook( redis_conn_id=self.redis_conn_id).get_conn().pubsub() self.pubsub.subscribe(self.channels)
def test_execute_hello(self): operator = RedisPublishOperator( task_id='test_task', dag=self.dag, message='hello', channel=self.channel, redis_conn_id='redis_default' ) hook = RedisHook(redis_conn_id='redis_default') pubsub = hook.get_conn().pubsub() pubsub.subscribe(self.channel) operator.execute(self.mock_context) context_calls = [] self.assertTrue(self.mock_context['ti'].method_calls == context_calls, "context calls should be same") message = pubsub.get_message() self.assertEquals(message['type'], 'subscribe') message = pubsub.get_message() self.assertEquals(message['type'], 'message') self.assertEquals(message['data'], b'hello') pubsub.unsubscribe(self.channel)
def test_poke_true(self): sensor = RedisPubSubSensor(task_id='test_task', dag=self.dag, channels='test', redis_conn_id='redis_default') hook = RedisHook(redis_conn_id='redis_default') redis = hook.get_conn() redis.publish('test', 'message') result = sensor.poke(self.mock_context) self.assertFalse(result) result = sensor.poke(self.mock_context) self.assertTrue(result) context_calls = [ call.xcom_push(key='message', value={ 'type': 'message', 'pattern': None, 'channel': b'test', 'data': b'message' }) ] self.assertTrue(self.mock_context['ti'].method_calls == context_calls, "context calls should be same") result = sensor.poke(self.mock_context) self.assertFalse(result)
def test_poke(self): hook = RedisHook(redis_conn_id='redis_default') redis = hook.get_conn() redis.set('test_key', 'test_value') self.assertTrue(self.sensor.poke(None), "Key exists on first call.") redis.delete('test_key') self.assertFalse(self.sensor.poke(None), "Key does NOT exists on second call.")
def query_and_extract(**context): http_conn = HttpHook('GET', http_conn_id) redis_conn = RedisHook(redis_conn_id) prev_exec_date = context.get('prev_execution_date') next_exec_date = context.get('next_execution_date') query_meta = "SELECT fileName FROM archive_files WHERE archiveName = '{}'" \ " AND ingestDate > '{}' and ingestDate <= '{}' ORDER BY ingestDate".format(collection, prev_exec_date.strftime( datetime_format), next_exec_date.strftime(datetime_format)) logging.info('Query: {}'.format(query_meta)) data = { 'QUERY': query_meta, 'LANG': 'ADQL', 'FORMAT': '{}'.format(output_format) } with http_conn.run('/ad/auth-sync?{}'.format( parse.urlencode(data))) as response: artifact_files_list = response.text.split()[1:] if artifact_files_list: redis_key = '{}_{}_{}.{}'.format(collection, _to_milliseconds(prev_exec_date), _to_milliseconds(next_exec_date), output_format) redis_conn.get_conn().rpush(redis_key, artifact_files_list) return redis_key
def create_transform_task(redis_key): redis_conn = RedisHook(redis_conn_id) input_file_names = redis_conn.get_conn().lrange(redis_key, 0, -1) child_dag_id = '_files_{}'.format(redis_key) return SubDagOperator(subdag=sub_dag(child_dag_id, input_file_names, redis_key), task_id=child_dag_id, dag=vlass_dag)
def set_redis(key, value, **context): redis_hook = RedisHook(redis_conn_id='redis_default') r = redis_hook.get_conn() r.set(key, value) context['ti'].xcom_push('redis-test', value) context['ti'].xcom_push('redis-branch-test', True)
def get_driver_num(**op_kwargs): driver_num = {} res = [] conn = get_db_conn('mysql_oride_data_readonly') mcursor = conn.cursor() driver_id = -1 results = tuple() driver_dic = {} while True: sql = query_driver_city_serv.format(id=driver_id) logging.info(sql) mcursor.execute(sql) conn.commit() tmp = mcursor.fetchall() if not tmp: break results += tmp driver_id = tmp[-1][0] mcursor.close() conn.close() for data in results: driver_dic[data[0]] = ",".join([str(data[1]), str(data[2])]) redis_conn = RedisHook(redis_conn_id='pika_85').get_conn() ts = op_kwargs['ts'] dt, h = ts.split('T') dt = dt + ' ' + h.split('+')[0] time_array = time.strptime(dt, "%Y-%m-%d %H:%M:%S") timestamp = int(time.mktime(time_array)) a_member = set() no_member = set() dt_start = time.strftime('%Y%m%d%H%M', time.localtime(timestamp)) for i in range(0, 10): dt = time.strftime('%Y%m%d%H%M', time.localtime(timestamp + i * 60)) a_member = a_member.union(set(redis_conn.smembers(active_a_driver % dt))) no_member = no_member.union(set(redis_conn.smembers(active_no_driver % dt))) for mem in a_member: tmp = driver_dic.get(int(mem), '0,0') if tmp not in driver_num: driver_num[tmp] = {"a_mem": 0, "no_mem": 0} driver_num[tmp]["a_mem"] += 1 for mem in no_member: tmp = driver_dic.get(int(mem), '0,0') if tmp not in driver_num: driver_num[tmp] = {"a_mem": 0, "no_mem": 0} driver_num[tmp]["no_mem"] += 1 for k, v in driver_num.items(): info = k.split(",") res.append([int(info[0]), int(info[1]), dt_start+'00', v["a_mem"], v["no_mem"]]) conn = get_db_conn('mysql_bi') mcursor = conn.cursor() mcursor.executemany(insert_driver_num, res) logging.info('insert num %s, data %s', len(res), str(res)) conn.commit() mcursor.close() conn.close()
def test_get_conn(self): hook = RedisHook(redis_conn_id='redis_default') self.assertEqual(hook.client, None) self.assertEqual( repr(hook.get_conn()), ( 'StrictRedis<ConnectionPool' '<Connection<host=localhost,port=6379,db=0>>>' ) )
def test_real_get_and_set(self): hook = RedisHook(redis_conn_id='redis_default') redis = hook.get_conn() self.assertTrue(redis.set('test_key', 'test_value'), 'Connection to Redis with SET works.') self.assertEqual(redis.get('test_key'), b'test_value', 'Connection to Redis with GET works.') self.assertEqual(redis.delete('test_key'), 1, 'Connection to Redis with DELETE works.')
def test_get_conn(self): hook = RedisHook(redis_conn_id='redis_default') self.assertEqual(hook.redis, None) self.assertEqual(hook.host, None, 'host initialised as None.') self.assertEqual(hook.port, None, 'port initialised as None.') self.assertEqual(hook.password, None, 'password initialised as None.') self.assertEqual(hook.db, None, 'db initialised as None.') self.assertIs(hook.get_conn(), hook.get_conn(), 'Connection initialized only if None.')
class RedisPubSubSensor(BaseSensorOperator): """ Redis sensor for reading a message from pub sub channels """ template_fields = ('channels',) ui_color = '#f0eee4' @apply_defaults def __init__(self, channels, redis_conn_id, *args, **kwargs): """ Create a new RedisPubSubSensor and subscribe to the channels :param channels: The channels to be subscribed to (templated) :type channels: str or list of str :param redis_conn_id: the redis connection id :type redis_conn_id: str """ super().__init__(*args, **kwargs) self.channels = channels self.redis_conn_id = redis_conn_id self.pubsub = RedisHook(redis_conn_id=self.redis_conn_id).get_conn().pubsub() self.pubsub.subscribe(self.channels) def poke(self, context): """ Check for message on subscribed channels and write to xcom the message with key ``message`` An example of message ``{'type': 'message', 'pattern': None, 'channel': b'test', 'data': b'hello'}`` :param context: the context object :type context: dict :return: ``True`` if message (with type 'message') is available or ``False`` if not """ self.log.info('RedisPubSubSensor checking for message on channels: %s', self.channels) message = self.pubsub.get_message() self.log.info('Message %s from channel %s', message, self.channels) # Process only message types if message and message['type'] == 'message': context['ti'].xcom_push(key='message', value=message) self.pubsub.unsubscribe(self.channels) return True return False
def __init__(self, channels, redis_conn_id, *args, **kwargs): """ Create a new RedisPubSubSensor and subscribe to the channels :param channels: The channels to be subscribed to (templated) :type channels: str or list of str :param redis_conn_id: the redis connection id :type redis_conn_id: str """ super().__init__(*args, **kwargs) self.channels = channels self.redis_conn_id = redis_conn_id self.pubsub = RedisHook(redis_conn_id=self.redis_conn_id).get_conn().pubsub() self.pubsub.subscribe(self.channels)
def execute(self, context): """ Publish the message to Redis channel :param context: the context object :type context: dict """ redis_hook = RedisHook(redis_conn_id=self.redis_conn_id) self.log.info('Sending messsage %s to Redis on channel %s', self.message, self.channel) result = redis_hook.get_conn().publish(channel=self.channel, message=self.message) self.log.info('Result of publishing %s', result)
def user_label_to_redis(ds, **kwargs): label_list = { 'lab_new_user': 1, 'lab_login_without_orders': 2, 'lab_login_have_orders': 3, 'lab_cancel_ge_finish': 4 } query = """ SELECT user_id, lab_new_user, lab_login_without_orders, lab_login_have_orders, lab_cancel_ge_finish, phone_number FROM dashboard.oride_user_label WHERE dt='{dt}' """.format(dt=ds) cursor = get_hive_cursor() cursor.execute(query) results = cursor.fetchall() redis_conn = RedisHook(redis_conn_id='redis_user_lab').get_conn() expire_time = 86400 for user_id, lab_new_user, lab_login_without_orders, lab_login_have_orders, lab_cancel_ge_finish, phone_number in results: list = [] if lab_new_user == True: list.append(label_list['lab_new_user']) if lab_login_without_orders == True: list.append(label_list['lab_login_without_orders']) if lab_login_have_orders == True: list.append(label_list['lab_login_have_orders']) if lab_cancel_ge_finish == True: list.append(label_list['lab_cancel_ge_finish']) if len(list): redis_key = 'user_tag_%s' % phone_number redis_conn.set(redis_key, json.dumps(list), ex=expire_time) logging.info('user_id:%s, lab_list:%s, key:%s, phone_number:%s' % (user_id, json.dumps(list), redis_key, phone_number)) cursor.close()
def snapshot(**kwargs): """ Query the TAP service and snapshot the OMM data. #FIXME: The query should have some conditions to limit the data. """ logging.info('Populating inputs.') query = Variable.get('omm_input_uri_query') redis = RedisHook(redis_conn_id='redis_default') data = {'QUERY': query, 'REQUEST': 'doQuery', 'LANG': 'ADQL', 'FORMAT': 'csv'} http_connection = HttpHook(method='GET', http_conn_id='tap_service_host') count = -1 with http_connection.run('/tap/sync?', parse.urlencode(data)) as response: arr = response.text.split('\n') count = len(arr) logging.info('Found {} items.'.format(count)) sanitized_uris = [] for uri in arr[1:]: if uri: artifact_uri = uri.split('/')[1].strip() sanitized_artifact_uri = artifact_uri.replace( '+', '_').replace('%', '__') logging.info('Output is {}'.format(sanitized_artifact_uri)) sanitized_uris.append(sanitized_artifact_uri) redis.get_conn().rpush(redis_key, *sanitized_uris) redis.get_conn().persist(redis_key) return 'Extracted {} items'.format(len(sanitized_uris))
def get_hook(self): try: if self.conn_type == 'mysql': from airflow.hooks.mysql_hook import MySqlHook return MySqlHook(mysql_conn_id=self.conn_id) elif self.conn_type == 'google_cloud_platform': from airflow.contrib.hooks.bigquery_hook import BigQueryHook return BigQueryHook(bigquery_conn_id=self.conn_id) elif self.conn_type == 'postgres': from airflow.hooks.postgres_hook import PostgresHook return PostgresHook(postgres_conn_id=self.conn_id) elif self.conn_type == 'hive_cli': from airflow.hooks.hive_hooks import HiveCliHook return HiveCliHook(hive_cli_conn_id=self.conn_id) elif self.conn_type == 'presto': from airflow.hooks.presto_hook import PrestoHook return PrestoHook(presto_conn_id=self.conn_id) elif self.conn_type == 'hiveserver2': from airflow.hooks.hive_hooks import HiveServer2Hook return HiveServer2Hook(hiveserver2_conn_id=self.conn_id) elif self.conn_type == 'sqlite': from airflow.hooks.sqlite_hook import SqliteHook return SqliteHook(sqlite_conn_id=self.conn_id) elif self.conn_type == 'jdbc': from airflow.hooks.jdbc_hook import JdbcHook return JdbcHook(jdbc_conn_id=self.conn_id) elif self.conn_type == 'mssql': from airflow.hooks.mssql_hook import MsSqlHook return MsSqlHook(mssql_conn_id=self.conn_id) elif self.conn_type == 'oracle': from airflow.hooks.oracle_hook import OracleHook return OracleHook(oracle_conn_id=self.conn_id) elif self.conn_type == 'vertica': from airflow.contrib.hooks.vertica_hook import VerticaHook return VerticaHook(vertica_conn_id=self.conn_id) elif self.conn_type == 'cloudant': from airflow.contrib.hooks.cloudant_hook import CloudantHook return CloudantHook(cloudant_conn_id=self.conn_id) elif self.conn_type == 'jira': from airflow.contrib.hooks.jira_hook import JiraHook return JiraHook(jira_conn_id=self.conn_id) elif self.conn_type == 'redis': from airflow.contrib.hooks.redis_hook import RedisHook return RedisHook(redis_conn_id=self.conn_id) elif self.conn_type == 'wasb': from airflow.contrib.hooks.wasb_hook import WasbHook return WasbHook(wasb_conn_id=self.conn_id) elif self.conn_type == 'docker': from airflow.hooks.docker_hook import DockerHook return DockerHook(docker_conn_id=self.conn_id) except: pass
def test_poke_true(self): sensor = RedisPubSubSensor( task_id='test_task', dag=self.dag, channels='test', redis_conn_id='redis_default' ) hook = RedisHook(redis_conn_id='redis_default') redis = hook.get_conn() redis.publish('test', 'message') result = sensor.poke(self.mock_context) self.assertFalse(result) result = sensor.poke(self.mock_context) self.assertTrue(result) context_calls = [ call.xcom_push( key='message', value={'type': 'message', 'pattern': None, 'channel': b'test', 'data': b'message'})] self.assertTrue(self.mock_context['ti'].method_calls == context_calls, "context calls should be same") result = sensor.poke(self.mock_context) self.assertFalse(result)
def test_first_conn_instantiation(self, get_conn): hook = RedisHook(redis_conn_id='redis_default') hook.key_exists('test_key') self.assertTrue(get_conn.called_once())
def get_redis(key, **context): redis_hook = RedisHook(redis_conn_id='redis_default') r = redis_hook.get_conn() return r.get(key)
def get_hook(self): if self.conn_type == 'mysql': from airflow.hooks.mysql_hook import MySqlHook return MySqlHook(mysql_conn_id=self.conn_id) elif self.conn_type == 'google_cloud_platform': from airflow.gcp.hooks.bigquery import BigQueryHook return BigQueryHook(bigquery_conn_id=self.conn_id) elif self.conn_type == 'postgres': from airflow.hooks.postgres_hook import PostgresHook return PostgresHook(postgres_conn_id=self.conn_id) elif self.conn_type == 'pig_cli': from airflow.hooks.pig_hook import PigCliHook return PigCliHook(pig_cli_conn_id=self.conn_id) elif self.conn_type == 'hive_cli': from airflow.hooks.hive_hooks import HiveCliHook return HiveCliHook(hive_cli_conn_id=self.conn_id) elif self.conn_type == 'presto': from airflow.hooks.presto_hook import PrestoHook return PrestoHook(presto_conn_id=self.conn_id) elif self.conn_type == 'hiveserver2': from airflow.hooks.hive_hooks import HiveServer2Hook return HiveServer2Hook(hiveserver2_conn_id=self.conn_id) elif self.conn_type == 'sqlite': from airflow.hooks.sqlite_hook import SqliteHook return SqliteHook(sqlite_conn_id=self.conn_id) elif self.conn_type == 'jdbc': from airflow.hooks.jdbc_hook import JdbcHook return JdbcHook(jdbc_conn_id=self.conn_id) elif self.conn_type == 'mssql': from airflow.hooks.mssql_hook import MsSqlHook return MsSqlHook(mssql_conn_id=self.conn_id) elif self.conn_type == 'oracle': from airflow.hooks.oracle_hook import OracleHook return OracleHook(oracle_conn_id=self.conn_id) elif self.conn_type == 'vertica': from airflow.contrib.hooks.vertica_hook import VerticaHook return VerticaHook(vertica_conn_id=self.conn_id) elif self.conn_type == 'cloudant': from airflow.contrib.hooks.cloudant_hook import CloudantHook return CloudantHook(cloudant_conn_id=self.conn_id) elif self.conn_type == 'jira': from airflow.contrib.hooks.jira_hook import JiraHook return JiraHook(jira_conn_id=self.conn_id) elif self.conn_type == 'redis': from airflow.contrib.hooks.redis_hook import RedisHook return RedisHook(redis_conn_id=self.conn_id) elif self.conn_type == 'wasb': from airflow.contrib.hooks.wasb_hook import WasbHook return WasbHook(wasb_conn_id=self.conn_id) elif self.conn_type == 'docker': from airflow.hooks.docker_hook import DockerHook return DockerHook(docker_conn_id=self.conn_id) elif self.conn_type == 'azure_data_lake': from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id) elif self.conn_type == 'azure_cosmos': from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id) elif self.conn_type == 'cassandra': from airflow.contrib.hooks.cassandra_hook import CassandraHook return CassandraHook(cassandra_conn_id=self.conn_id) elif self.conn_type == 'mongo': from airflow.contrib.hooks.mongo_hook import MongoHook return MongoHook(conn_id=self.conn_id) elif self.conn_type == 'gcpcloudsql': from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id) elif self.conn_type == 'grpc': from airflow.contrib.hooks.grpc_hook import GrpcHook return GrpcHook(grpc_conn_id=self.conn_id) raise AirflowException("Unknown hook type {}".format(self.conn_type))
def test_get_conn_password_stays_none(self): hook = RedisHook(redis_conn_id='redis_default') hook.get_conn() self.assertEqual(hook.password, None)
def test_real_ping(self): hook = RedisHook(redis_conn_id='redis_default') redis = hook.get_conn() self.assertTrue(redis.ping(), 'Connection to Redis with PING works.')
from urllib import parse as parse from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2018, 7, 26), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'provide_context': True, 'retry_delay': timedelta(minutes=5) } redis_hook = RedisHook(redis_conn_id='redis_default') http_conn = HttpHook('GET', 'test_netrc') # TODO - when deploying to have this actually run, catchup=True!!!!! # and schedule_interval=timedelta(hours=1) vlass_find_work = DAG('vlass_find_work', default_args=default_args, catchup=True, schedule_interval=timedelta(hours=1)) # provide_context in default_args above must be True to get the kwargs values def query_vlass(ds, **kwargs): prev_date = kwargs['prev_execution_date'].to_datetime_string() next_date = kwargs['next_execution_date'].to_datetime_string() query_meta = "SELECT fileName FROM archive_files WHERE archiveName ='VLASS'" \ " AND ingestDate > '{}' and ingestDate <= '{}'".format( prev_date, next_date)
# https://github.com/apache/airflow/tree/master/airflow/providers/redis import redis from airflow.contrib.hooks.redis_hook import RedisHook # from airflow.providers.redis.hooks.redis import RedisHook # cache = redis.StrictRedis(host='redis', port=6379, db=0) # print cache.keys() cache2 = redis = RedisHook().get_conn() print cache2.keys()
def poke(self, context): self.log.info('Sensor check existence of key: %s', self.key) return RedisHook(self.redis_conn_id).key_exists(self.key)
def set_redis_data(data, **kwargs): prefixRunId = getUniqueRunId(kwargs) redis_conn = RedisHook(redis_conn_id='redis_default').get_conn() redis_conn.set(prefixRunId, json.dumps(data))
def get_redis_data(kwargs): prefixRunId = getUniqueRunId(kwargs) redis_conn = RedisHook(redis_conn_id='redis_default').get_conn() return json.loads(redis_conn.get(prefixRunId))
def clean_up(**kwargs): prefixRunId = getUniqueRunId(kwargs) redis_conn = RedisHook(redis_conn_id='redis_default').get_conn() if redis_conn.get(prefixRunId) != None: redis_conn.delete(prefixRunId)