def get_spin(**kwargs): http = HttpHook('GET', http_conn_id='http_gbfs_spin') response = http.run("/api/gbfs/v1/detroit/free_bike_status") spins = json.loads(response.text) for s in spins['data']['bikes']: device_id = s.pop('bike_id') lat = s.pop('lat') lon = s.pop('lon') insert = f""" insert into scooters.availability ( vendor, device_id, timestamp, extra, geom ) values ( 'spin', '{device_id}', '{kwargs['execution_date']}', '{json.dumps(s)}', ST_SetSRID(ST_MakePoint({lon},{lat}), 4326) ) """ pg.run(insert) return response
def get_data_zomato_api(*args, **kwargs): api_hook = HttpHook(http_conn_id="zomato_api", method='GET') data_dict = {} schema = {"properties": {"restaurants": {"mergeStrategy": "append"}}} merger = Merger(schema) for i in range(0, 100, 20): endpoint_url = "search?entity_id=3&entity_type=city&start={}&count=20&sort=rating".format( i) resp_url = api_hook.run(endpoint=endpoint_url) resp = json.loads(resp_url.content) if i == 0: data_dict.update(resp) result = data_dict else: result = merger.merge(result, resp) with open( "/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json", "w") as f: json.dump(result, f) f.close()
def copy_brazil_data_file(origin_host, origin_filepath, dest_bucket, dest_key): """Copy Brazil data file to a local bucket. Copy the source file which contains detailed data about Brazil to an AWS S3 bucket to make it available to AWS EMR. args: origin_host (str): host where the source file is in origin_filepath (str): full path to the file in the host dest_bucket (str): name of the bucket to store the file dest_key (str): prefix/name of the file in the destination bucket """ logging.info('Copying Brazil data file ' \ f'FROM: http://{origin_host}/{origin_filepath} ' \ f'TO: s3://{dest_bucket}/{dest_key}') # Create a connection to the source server conn = Connection(conn_id='http_conn_brasilio', conn_type='http', host=origin_host, port=80) #create a connection object session = settings.Session() # get the session session.add(conn) session.commit() # Get the data file http_hook = HttpHook(method='GET', http_conn_id='http_conn_brasilio') response_br_data = http_hook.run(origin_filepath) # Store data file into s3 bucket s3_hook = S3Hook(aws_conn_id='aws_default') s3_hook.load_bytes(response_br_data.content, dest_key, bucket_name=dest_bucket, replace=True) logging.info('Data copy finished.')
def test_connection_without_host(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'http://')
def get_typeahead_result(self, http_endpoint, data_headers): """Look up BAG verblijfsobject id from typeahead service for non-geometry location data""" http = HttpHook(method="GET", http_conn_id=self.http_conn_id) http_response = http.run(endpoint=http_endpoint, data=None, headers=data_headers) return http_response
def __init__(self, http_conn_id, token, job_name, data=None, headers=None, method='start', daemon=True, parallelism=0, retry_times=3, retry_sleep_time=1, *args, **kwargs): basic_headers = {'Content-Type': "application/json", 'Token': token} if headers: basic_headers.update(headers) self.headers = basic_headers self.http_conn_id = http_conn_id self.job_name = job_name self.http = HttpHook('POST', http_conn_id=self.http_conn_id) self.data = data if data is not None else {} self.job_last_run_id = dict() self.job_pools = [] self.all_jobs = None self.finished_jobs = [] self.parallelism = parallelism self.method = method self.daemon = daemon self.retry_times = retry_times self.retry_sleep_time = retry_sleep_time self.start_run_time = time.time() self.failed_jobs = defaultdict(int) super(_BaseJobOperator, self).__init__(*args, **kwargs)
def print_hello(): task_id = 'PQS_details' #task_id='Lookup_Post', http_conn_id = 'qs_default' #http_conn_id='lookup_conn', method = 'POST' data = task #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"}, #data=json.dumps({'ldap' : 'tangupta'}), endpoint = 'query/12345@xyzOrg/test' #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb', headers = { "Content-Type": "application/json", "Authorization": "Bearer eyJ4NXUiOiJpbXMta2V5LTEuY2VyIiwiYWxnIjoiUlMyNTYifQ.eyJpZCI6IjE1MTcyNDUzNzgzNDVfYjg2ZGU4ZDgtZTAzZi00OGNmLThmZTAtN2VmNDcxYjFhYzRhX3VlMSIsImNsaWVudF9pZCI6Ik1DRFBDYXRhbG9nU2VydmljZVFhMiIsInVzZXJfaWQiOiJNQ0RQQ2F0YWxvZ1NlcnZpY2VRYTJAQWRvYmVJRCIsInR5cGUiOiJhY2Nlc3NfdG9rZW4iLCJhcyI6Imltcy1uYTEtcWEyIiwicGFjIjoiTUNEUENhdGFsb2dTZXJ2aWNlUWEyX2RldnFhIiwicnRpZCI6IjE1MTcyNDUzNzgzNTBfZjNkOTExMTEtZjFjNi00OTIzLThjNWEtYThmN2RjNGJkNjhhX3VlMSIsInJ0ZWEiOiIxNTE4NDU0OTc4MzUwIiwibW9pIjoiMzI5YjFmNDciLCJjIjoiTnd3VmE4UG9ubDZuWkZpQi8yZjdkUT09IiwiZXhwaXJlc19pbiI6Ijg2NDAwMDAwIiwic2NvcGUiOiJzeXN0ZW0sQWRvYmVJRCxvcGVuaWQiLCJjcmVhdGVkX2F0IjoiMTUxNzI0NTM3ODM0NSJ9.MTuH1VI-b4PcLlr1BHB4a_CXxwn8TbsyQVk3pJi2uoNpng9jhH1W-lfg0pzbhLy0Ji81HlIopbTEGYNYlfXjzE9OtKM-BX7-VQOSXmS7tgAkfec0OheZariZUIdpXUa4YY-BHLsSijkBWs-AUTzZ9SG91M-8cWn_4U7uPSRn2aqEqT92kZgRRl-u5Zq6YteSFXUJCpY64IOO2c-sZfLQ6yQb425qUyn468ECPElZ5h00483xs81ZFF-r8LFhQtQpQ7m9pfNFZWqUexM_RrQNUL6XZmiq7mZ1_wxw1RSIDDMTvt35JU2MUjs6B0c9GwMTL814ej0ewrDV1qhAIliLlA" } extra_options = {} http = HttpHook(method, http_conn_id='qs_default') logging.info('Calling HTTP method') print(os.environ['PATH']) print(os.environ['TEST_PATH']) response = http.run('query/12345@xyzOrg/visual', data, headers, extra_options) print(response) print(response.text) print(configuration.get('testing', 'tanuj').encode('utf-8')) return 'Hello world!'
def _download_from_http(self): http = HttpHook("GET", http_conn_id=self.http_connection_id) self.log.info("Calling HTTP method") response = http.run(self.http_endpoint) self.log.info(response.text) return response.text
def __init__(self, pre_task_id, token_hook_conn_id, api_hook_conn_id, *args, **kwargs): self.pre_task_id = pre_task_id self.token_hook = HttpHook(http_conn_id=token_hook_conn_id) self.api_hook = HttpHook(http_conn_id=api_hook_conn_id) self.token = None super(SpiderJobSensor, self).__init__(*args, **kwargs)
def test_host_encoded_https_connection(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http', host='https://localhost') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'https://localhost')
def execute(self, context): http = HttpHook(self.method, http_conn_id=self.http_conn_id) self.log.info("Calling HTTP method") response = http.run(self.endpoint, self.data, self.headers, self.extra_options) return str(json.loads(response.text)['id'])
def _start_metronome_job(self, auth_token): """ Starts the metronome jobs with the ID self.metronome_job_id Returns the newly started job run ID :param auth_token: authorization token :type auth_token: string """ self.log.info("Starting Metronome job") http = HttpHook('POST', http_conn_id=self.dcos_http_conn_id) self.log.info("Calling HTTP method") response = http.run( endpoint= f"{self.dcos_metronome_jobs_api_endpoint}/{self._metronome_job_id}/runs", headers={ "Content-Type": "application/json", "Authorization": f"token={auth_token}" }) self.log.debug(f"Response is: {str(response)}") self.log.info(response.text) job_run_id = json.loads(response.text)['id'] self.log.debug(f"Job run id is: {str(job_run_id)}") return job_run_id
def _get_all_metronome_jobs(self, auth_token): """ Returns all currently deployed Metronome jobs on the cluster :param auth_token: authorization token :type auth_token: string """ self.log.info("Getting all Metronome jobs") http = HttpHook('GET', http_conn_id=self.dcos_http_conn_id) self.log.info("Calling HTTP method") response = http.run(endpoint=self.dcos_metronome_jobs_api_endpoint, data=json.dumps({ "uid": self.dcos_robot_user_name, "password": self.dcos_robot_user_pwd }), headers={ "Content-Type": "application/json", "Authorization": f"token={auth_token}" }) self.log.debug(f"Response is: {str(response)}") self.log.info(response.text) all_metronome_jobs = json.loads(response.text) self.log.debug(f"Metronome jobs are: {str(all_metronome_jobs)}") return all_metronome_jobs
def sub_dag(child_dag_id, input_file_names, key): sub_dag = DAG('{}.{}'.format(dag_id, child_dag_id), default_args=default_args, catchup=False, schedule_interval=vlass_dag.schedule_interval) http_conn = HttpHook('GET', http_conn_id) auth_conn = HttpHook.get_connection(http_conn_id) with http_conn.run('/cred/auth/priv/users/{}'.format( auth_conn.login)) as response: cert = response.text for idx, x in enumerate(input_file_names): KubernetesPodOperator( dag=sub_dag, namespace='default', task_id='vlass-transform-{}-{}'.format(idx, key), in_cluster=True, get_logs=True, cmds=['{}_run_single'.format(collection.lower())], arguments=[x, cert], name='airflow-vlass-transform-pod', volumes=[volume], volume_mounts=[volume_mount]) return sub_dag
def insert_rows(): pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={ 'resource_id': table_variables['resource_id'], 'limit': '10000000' }) http_hook.check_response(response=res) bcn_covid_measures = res.json()['result']['records'] bcn_covid_df = pd.DataFrame(bcn_covid_measures) bcn_covid_df = bcn_covid_df[[ '_id', 'Data_Indicador', 'Font', 'Frequencia_Indicador', 'Nom_Indicador', 'Nom_Variable', 'Territori', 'Unitat', 'Valor' ]] bcn_covid_df.replace({ 'NA': np.nan, '-Inf': np.nan, 'Inf': np.nan }, inplace=True) insert_ts = datetime.utcnow() for row in bcn_covid_df.itertuples(index=False): pg_hook.run(sql_insert, parameters=(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], insert_ts))
def insert_rows(): insert_ts = datetime.utcnow() pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={'codigo': table_variables['codigo']}) http_hook.check_response(response=res) cases_df = pd.DataFrame(res.json()['timeline']) for row in cases_df.itertuples(index=False): date = row.fecha information = pd.Series(row.regiones[0]['data']) information = information[['casosConfirmados', 'casosUci', 'casosFallecidos', 'casosHospitalizados', 'casosRecuperados', 'casosConfirmadosDiario', 'casosUciDiario', 'casosFallecidosDiario', 'casosHospitalizadosDiario', 'casosRecuperadosDiario']] pg_hook.run(sql_insert, parameters=(date, information[0], information[1], information[2], information[3], information[4], information[5], information[6], information[7], information[8], information[9], insert_ts))
def _fetch_headers(self, force_refresh=False): headers = {"Content-Type": "application/x-ndjson"} if not self.protected: return headers if ( self.access_token is None or time.time() + self.token_expires_margin > self.token_expires_time or force_refresh ): form_params = dict( grant_type="client_credentials", client_id=OIDC_CLIENT_ID, client_secret=OIDC_CLIENT_SECRET, ) http = HttpHook(http_conn_id="oidc_server", method="POST") for i in range(3): try: response = http.run(OIDC_TOKEN_ENDPOINT, data=form_params) except AirflowException: self.log.exception("Keycloak unreachable") time.sleep(1) else: break else: raise token_info = response.json() self.access_token = token_info["access_token"] self.token_expires_time = time.time() + token_info["expires_in"] headers["Authorization"] = f"Bearer {self.access_token}" return headers
def _get_weather_data(self, lat, lon): """ Gets the weather data from the specified coordinates and time :param lat: latitude to be used as query param. :param lon: longitude to be used as query param. :return: response retrieved from the API """ open_weather = HttpHook(method="GET", http_conn_id=self.open_weather_conn) data = { "lat": lat, "lon": lon, "dt": calendar.timegm(self.date.timetuple()), "appid": self.app_id, "units": "metric", } response = open_weather.run("/data/2.5/onecall/timemachine", data=data) if response.status_code == 200: self.log.info("Weather data successfully retrived from location") return self._weather_date_to_datetime(response.json()["hourly"]) else: raise ValueError
def get_rates(ds, **kwargs): pg_hook = PostgresHook(postgres_conn_id='rates') api_hook = HttpHook(http_conn_id='openexchangerates', method='GET') # If either of these raises an exception then we'll be notified via # Airflow resp = api_hook.run('') resp = json.loads(resp.content) # These are the only valid pairs the DB supports at the moment. Anything # else that turns up will be ignored. valid_pairs = ( 'AED', 'AFN', 'ALL', 'AMD', 'ANG', 'AOA', 'ARS', 'AUD', 'AWG', 'AZN', 'BAM', 'BBD', 'BDT', 'BGN', 'BHD', 'BIF', 'BMD', 'BND', 'BOB', 'BRL', 'BSD', 'BTC', 'BTN', 'BWP', 'BYN', 'BYR', 'BZD', 'CAD', 'CDF', 'CHF', 'CLF', 'CLP', 'CNY', 'COP', 'CRC', 'CUC', 'CUP', 'CVE', 'CZK', 'DJF', 'DKK', 'DOP', 'DZD', 'EEK', 'EGP', 'ERN', 'ETB', 'EUR', 'FJD', 'FKP', 'GBP', 'GEL', 'GGP', 'GHS', 'GIP', 'GMD', 'GNF', 'GTQ', 'GYD', 'HKD', 'HNL', 'HRK', 'HTG', 'HUF', 'IDR', 'ILS', 'IMP', 'INR', 'IQD', 'IRR', 'ISK', 'JEP', 'JMD', 'JOD', 'JPY', 'KES', 'KGS', 'KHR', 'KMF', 'KPW', 'KRW', 'KWD', 'KYD', 'KZT', 'LAK', 'LBP', 'LKR', 'LRD', 'LSL', 'LTL', 'LVL', 'LYD', 'MAD', 'MDL', 'MGA', 'MKD', 'MMK', 'MNT', 'MOP', 'MRO', 'MTL', 'MUR', 'MVR', 'MWK', 'MXN', 'MYR', 'MZN', 'NAD', 'NGN', 'NIO', 'NOK', 'NPR', 'NZD', 'OMR', 'PAB', 'PEN', 'PGK', 'PHP', 'PKR', 'PLN', 'PYG', 'QAR', 'RON', 'RSD', 'RUB', 'RWF', 'SAR', 'SBD', 'SCR', 'SDG', 'SEK', 'SGD', 'SHP', 'SLL', 'SOS', 'SRD', 'STD', 'SVC', 'SYP', 'SZL', 'THB', 'TJS', 'TMT', 'TND', 'TOP', 'TRY', 'TTD', 'TWD', 'TZS', 'UAH', 'UGX', 'USD', 'UYU', 'UZS', 'VEF', 'VND', 'VUV', 'WST', 'XAF', 'XAG', 'XAU', 'XCD', 'XDR', 'XOF', 'XPD', 'XPF', 'XPT', 'YER', 'ZAR', 'ZMK', 'ZMW', 'ZWL') rates_insert = """INSERT INTO rates (pair, valid_until, rate) VALUES (%s, %s, %s);""" # If this raises an exception then we'll be notified via Airflow valid_until = datetime.fromtimestamp(resp['timestamp']) for (iso2, rate) in resp['rates'].items(): # If converting the rate to a float fails for whatever reason then # just move on. try: rate = float(rate) except: continue iso2 = iso2.upper().strip() if iso2 not in valid_pairs or rate < 0: continue pg_hook.run(rates_insert, parameters=(iso2, valid_until, rate))
def execute(self, context): # Run the training pipeline hook = HttpHook(method="POST", http_conn_id=self.conn_id) data = json.dumps({ "environment_public_id": self.environment_id, "run_source_commit": self.run_source_commit, "resources": self.resources, }) try: res = hook.run( RunPipelineOperator.RUN_PIPELINE_PATH.format(self.pipeline_id), data=data, headers=hook.get_connection(self.conn_id).extra_dejson, ) except AirflowException as ex: self.log.error("Failed to run pipeline") raise ex pipeline_run_id = json.loads(res.content)["id"] self.pipeline_run_id = pipeline_run_id # Used for cleanup only self.log.info("Pipeline successfully run, pipeline run ID: {}".format( pipeline_run_id)) # Poll pipeline run status deadline = datetime.utcnow() + self.run_timeout get_hook = HttpHook(method="GET", http_conn_id=self.conn_id) while datetime.utcnow() < deadline: if self._check_status(get_hook, pipeline_run_id): return time.sleep(self.status_poke_interval) self._cleanup_run(pipeline_run_id) raise Exception("Run timed out {}".format(pipeline_run_id))
def setUp(self): session = requests.Session() adapter = requests_mock.Adapter() session.mount('mock', adapter) self.get_hook = HttpHook(method='GET') self.post_hook = HttpHook(method='POST') configuration.load_test_config()
def store_product_catalog_with_partition(**kwargs): task_instance = kwargs['ti'] get_product_catalog_db_response = task_instance.xcom_pull( key=None, task_ids='get_product_catalog_db') get_product_catalog_db_json = json.loads(get_product_catalog_db_response) http_hook = HttpHook( method='POST', http_conn_id='product_catalog', ) for index, product_catalog in enumerate( get_product_catalog_db_json['rows']): product_catalog_document = product_catalog['value'] product_catalog_document.pop('_rev', None) product_catalog_document['_id'] = f"{product_catalog_document['type']}_{product_catalog_document['subtype']}" \ f":{product_catalog_document['_id']}" print("request", product_catalog_document) print("request", json.dumps(product_catalog_document).encode('utf-8')) print("request", json.loads(json.dumps(product_catalog_document))) response = http_hook.run( endpoint='copy_product', headers={"Content-Type": "application/json; charset=utf-8"}, json=product_catalog_document, ) print("response", response)
def insert_rows(): pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={ 'resource_id': table_variables['resource_id'], 'limit': '10000000' }) http_hook.check_response(response=res) unemployment_measures = res.json()['result']['records'] unemployment_df = pd.DataFrame(unemployment_measures) unemployment_df = unemployment_df[[ '_id', 'Any', 'Mes', 'Codi_Districte', 'Nom_Districte', 'Codi_Barri', 'Nom_Barri', 'Durada_atur', 'Nombre' ]] unemployment_df.replace({ 'NA': np.nan, '-Inf': np.nan, 'Inf': np.nan }, inplace=True) insert_ts = datetime.utcnow() for row in unemployment_df.itertuples(index=False): pg_hook.run(sql_insert, parameters=(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], insert_ts))
def print_hello(): task_id = 'CG_details' #task_id='Lookup_Post', http_conn_id = 'cg_default' #http_conn_id='lookup_conn', method = 'POST' data = rq_body_param, #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"}, #data=json.dumps({'ldap' : 'tangupta'}), endpoint = 'xyz/test/execute/jobs' #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb', headers = { "Content-Type": "application/json", "accept": "application/json", "x-api-key": "acp_testing", "Authorization": "Bearer eyJ4NXUiOiJpbXNfbmExLXN0ZzEta2V5LTEuY2VyIiwiYWxnIjoiUlMyNTYifQ.eyJpZCI6IjE1MTcyMTI4MjAzMjhfNjNkMzI5NjMtOTYzYy00YjA2LTk3MjAtN2M2OTExZDI2Y2E5X3VlMSIsImNsaWVudF9pZCI6ImFjcF90ZXN0aW5nIiwidXNlcl9pZCI6ImFjcF90ZXN0aW5nQEFkb2JlSUQiLCJ0eXBlIjoiYWNjZXNzX3Rva2VuIiwiYXMiOiJpbXMtbmExLXN0ZzEiLCJwYWMiOiJhY3BfdGVzdGluZ19zdGciLCJydGlkIjoiMTUxNzIxMjgyMDMyOV84YzFhYzRhOC1lZjM0LTQ3ZWYtOWFkNi0xMmI0ZTg3MjYzNjdfdWUxIiwicnRlYSI6IjE1MTg0MjI0MjAzMjkiLCJtb2kiOiJkMjVhMzg5ZSIsImMiOiJZSFg3Rld5d2JnaDhTYy9FMW1vaWJBPT0iLCJleHBpcmVzX2luIjoiODY0MDAwMDAiLCJzY29wZSI6ImFjcC5mb3VuZGF0aW9uLmFjY2Vzc0NvbnRyb2wsYWNwLmNvcmUucGlwZWxpbmUsc3lzdGVtLG9wZW5pZCxBZG9iZUlELGFkZGl0aW9uYWxfaW5mby5yb2xlcyxhZGRpdGlvbmFsX2luZm8ucHJvamVjdGVkUHJvZHVjdENvbnRleHQsYWNwLmZvdW5kYXRpb24sYWNwLmZvdW5kYXRpb24uY2F0YWxvZyxhY3AuZGlzY292ZXJ5IiwiY3JlYXRlZF9hdCI6IjE1MTcyMTI4MjAzMjgifQ.Q0eAxwLdkQ7XEDzpVwDtoKsmwySkEN26F85wDWjgo5j8lriO_8hUDEYYTXJjvXd0xOr82OnIQnWrDe8LXGLswH2rUYmR0oC40Wfv_ZMLf6IPyghNSw5QWKMYhOKTq-4n2kFvnvSh2Dq_F3govWSo1OWR609xC-HKLGAfBgWqAvCN5WPGQzQ8e5zeqCgclBTk4noBqJIVV06hJROSiD2Gt7FyC6YNMm3B-fVaOfFb4C2WBeGprQphXsVirMSvt9lWEYKqo5pGHgOlL5U40LeWFQMcnfOcmIntDG56BE3lhdyQeeltYbZlg1_RwsVwL5OcVWCtceyB0PWj9HheqvRsvA" } extra_options = {} http = HttpHook(method, http_conn_id='cg_default') logging.info('Calling HTTP method') print(os.environ['PATH']) response = http.run(endpoint, data, headers, extra_options) print(response) print(response.text) print(configuration.get('testing', 'tanuj').encode('utf-8')) return 'Hello world!'
def print_hello(): task_id = 'IMS_details' #task_id='Lookup_Post', http_conn_id = 'cg_default' #http_conn_id='lookup_conn', method = 'POST' data = {}, #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"}, #data=json.dumps({'ldap' : 'tangupta'}), endpoint = '' #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb', headers = { "Content-Type": "application/x-www-form-urlencoded", "accept": "application/json" } http = HttpHook(method, http_conn_id='ims_default') logging.info('Calling HTTP method') print(os.environ['PATH']) response = http.run(endpoint, data, headers) print(response) print(response.text) print(configuration.get('testing', 'tanuj').encode('utf-8')) return 'Hello world!'
def start_spider(project, spider, token_hook_conn_id, api_hook_conn_id, instance_num=1, **context): """ 启动爬虫 :param project: :param spider: :param token_hook_conn_id: :param api_hook_conn_id: :param instance_num: :param context: :return: """ token_hook = HttpHook(http_conn_id=token_hook_conn_id) api_hook = HttpHook(http_conn_id=api_hook_conn_id) token = login(token_hook) res = run_job(project, spider, api_hook, token, instance_num=instance_num) if 'errors' in res: raise Exception(res['errors']) job_id_list = res['data'] context['task_instance'].xcom_push(key='job_id_list', value=job_id_list)
def get_lime(**kwargs): http = HttpHook('GET', http_conn_id='http_gbfs_lime') # get availability endpoint with limit = 1000 response = http.run("/api/partners/v1/gbfs/detroit/free_bike_status.json") limes = json.loads(response.text) for l in limes['data']['bikes']: device_id = l.pop('bike_id') lat = l.pop('lat') lon = l.pop('lon') insert = f""" insert into scooters.availability ( vendor, device_id, timestamp, extra, geom ) values ( 'lime', '{device_id}', '{kwargs['execution_date']}', '{json.dumps(l)}', ST_SetSRID(ST_MakePoint({lon},{lat}), 4326) ) """ pg.run(insert) return response
def snapshot(**kwargs): """ Query the TAP service and snapshot the OMM data. #FIXME: The query should have some conditions to limit the data. """ logging.info('Populating inputs.') query = Variable.get('omm_input_uri_query') redis = RedisHook(redis_conn_id='redis_default') data = {'QUERY': query, 'REQUEST': 'doQuery', 'LANG': 'ADQL', 'FORMAT': 'csv'} http_connection = HttpHook(method='GET', http_conn_id='tap_service_host') count = -1 with http_connection.run('/tap/sync?', parse.urlencode(data)) as response: arr = response.text.split('\n') count = len(arr) logging.info('Found {} items.'.format(count)) sanitized_uris = [] for uri in arr[1:]: if uri: artifact_uri = uri.split('/')[1].strip() sanitized_artifact_uri = artifact_uri.replace( '+', '_').replace('%', '__') logging.info('Output is {}'.format(sanitized_artifact_uri)) sanitized_uris.append(sanitized_artifact_uri) redis.get_conn().rpush(redis_key, *sanitized_uris) redis.get_conn().persist(redis_key) return 'Extracted {} items'.format(len(sanitized_uris))
def query_and_extract(**context): http_conn = HttpHook('GET', http_conn_id) redis_conn = RedisHook(redis_conn_id) prev_exec_date = context.get('prev_execution_date') next_exec_date = context.get('next_execution_date') query_meta = "SELECT fileName FROM archive_files WHERE archiveName = '{}'" \ " AND ingestDate > '{}' and ingestDate <= '{}' ORDER BY ingestDate".format(collection, prev_exec_date.strftime( datetime_format), next_exec_date.strftime(datetime_format)) logging.info('Query: {}'.format(query_meta)) data = { 'QUERY': query_meta, 'LANG': 'ADQL', 'FORMAT': '{}'.format(output_format) } with http_conn.run('/ad/auth-sync?{}'.format( parse.urlencode(data))) as response: artifact_files_list = response.text.split()[1:] if artifact_files_list: redis_key = '{}_{}_{}.{}'.format(collection, _to_milliseconds(prev_exec_date), _to_milliseconds(next_exec_date), output_format) redis_conn.get_conn().rpush(redis_key, artifact_files_list) return redis_key
def send_status_msg(**kwargs): http_conn_id='ingest_api_connection' endpoint='/datasets/status' method='PUT' headers={ #'authorization' : 'Bearer ' + kwargs['params']['auth_tok'], 'content-type' : 'application/json'} extra_options=[] http = HttpHook(method, http_conn_id=http_conn_id) md_fname = os.path.join(os.environ['AIRFLOW_HOME'], 'data/temp', kwargs['run_id'], 'rslt.yml') with open(md_fname, 'r') as f: md = yaml.safe_load(f) data = {'dataset_id' : kwargs['dag_run'].conf['submission_id'], 'status' : 'QA', 'message' : 'the process ran', 'metadata': md} print('data: ', data) print("Calling HTTP method") response = http.run(endpoint, json.dumps(data), headers, extra_options) print(response.text)
def execute(self, context): http = HttpHook(self.method, http_conn_id=self.http_conn_id) logging.info("Calling HTTP method") response = http.run(self.endpoint, self.data, self.headers, self.extra_options) if self.response_check: if not self.response_check(response): raise AirflowException("Response check returned False.")
def __init__(self, endpoint, http_conn_id='http_default', method='GET', request_params=None, headers=None, response_check=None, extra_options=None, *args, **kwargs): super(HttpSensor, self).__init__(*args, **kwargs) self.endpoint = endpoint self.http_conn_id = http_conn_id self.request_params = request_params or {} self.headers = headers or {} self.extra_options = extra_options or {} self.response_check = response_check self.hook = HttpHook( method=method, http_conn_id=http_conn_id)
def send_schedules_to_screen(**kwargs): # hope it's op_kwargs + context data = json.dumps(kwargs['task_instance'].xcom_pull(task_ids='create_schedules')) http = HttpHook(kwargs['method'], http_conn_id=kwargs['http_conn_id']) return http.run(kwargs['endpoint'], data, kwargs['headers']) # response_check from operator is on by default, I guess
def read_connection(**context): context['task_instance'].xcom_push(key='my_key', value='my_value') hook = HttpHook(http_conn_id='http_default') session = hook.get_conn(None) print("requests.sessions.Session: ", session)
class TestHttpHook(unittest.TestCase): """Test get, post and raise_for_status""" def setUp(self): session = requests.Session() adapter = requests_mock.Adapter() session.mount('mock', adapter) self.get_hook = HttpHook(method='GET') self.post_hook = HttpHook(method='POST') configuration.load_test_config() @requests_mock.mock() def test_raise_for_status_with_200(self, m): m.get( 'http://test:8080/v1/test', status_code=200, text='{"status":{"status": 200}}', reason='OK' ) with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): resp = self.get_hook.run('v1/test') self.assertEqual(resp.text, '{"status":{"status": 200}}') @requests_mock.mock() @mock.patch('requests.Request') def test_get_request_with_port(self, m, request_mock): from requests.exceptions import MissingSchema with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection_with_port ): expected_url = 'http://test.com:1234/some/endpoint' for endpoint in ['some/endpoint', '/some/endpoint']: try: self.get_hook.run(endpoint) except MissingSchema: pass request_mock.assert_called_once_with( mock.ANY, expected_url, headers=mock.ANY, params=mock.ANY ) request_mock.reset_mock() @requests_mock.mock() def test_get_request_do_not_raise_for_status_if_check_response_is_false(self, m): m.get( 'http://test:8080/v1/test', status_code=404, text='{"status":{"status": 404}}', reason='Bad request' ) with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): resp = self.get_hook.run('v1/test', extra_options={'check_response': False}) self.assertEqual(resp.text, '{"status":{"status": 404}}') @requests_mock.mock() def test_hook_contains_header_from_extra_field(self, m): with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): expected_conn = get_airflow_connection() conn = self.get_hook.get_conn() self.assertDictContainsSubset(json.loads(expected_conn.extra), conn.headers) self.assertEqual(conn.headers.get('bareer'), 'test') @requests_mock.mock() def test_hook_uses_provided_header(self, m): conn = self.get_hook.get_conn(headers={"bareer": "newT0k3n"}) self.assertEqual(conn.headers.get('bareer'), "newT0k3n") @requests_mock.mock() def test_hook_has_no_header_from_extra(self, m): conn = self.get_hook.get_conn() self.assertIsNone(conn.headers.get('bareer')) @requests_mock.mock() def test_hooks_header_from_extra_is_overridden(self, m): with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): conn = self.get_hook.get_conn(headers={"bareer": "newT0k3n"}) self.assertEqual(conn.headers.get('bareer'), 'newT0k3n') @requests_mock.mock() def test_post_request(self, m): m.post( 'http://test:8080/v1/test', status_code=200, text='{"status":{"status": 200}}', reason='OK' ) with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): resp = self.post_hook.run('v1/test') self.assertEqual(resp.status_code, 200) @requests_mock.mock() def test_post_request_with_error_code(self, m): m.post( 'http://test:8080/v1/test', status_code=418, text='{"status":{"status": 418}}', reason='I\'m a teapot' ) with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): with self.assertRaises(AirflowException): self.post_hook.run('v1/test') @requests_mock.mock() def test_post_request_do_not_raise_for_status_if_check_response_is_false(self, m): m.post( 'http://*****:*****@mock.patch('airflow.hooks.http_hook.requests.Session') def test_retry_on_conn_error(self, mocked_session): retry_args = dict( wait=tenacity.wait_none(), stop=tenacity.stop_after_attempt(7), retry=requests.exceptions.ConnectionError ) def send_and_raise(request, **kwargs): raise requests.exceptions.ConnectionError mocked_session().send.side_effect = send_and_raise # The job failed for some reason with self.assertRaises(tenacity.RetryError): self.get_hook.run_with_advanced_retry( endpoint='v1/test', _retry_args=retry_args ) self.assertEqual( self.get_hook._retry_obj.stop.max_attempt_number + 1, mocked_session.call_count ) def test_header_from_extra_and_run_method_are_merged(self): def run_and_return(session, prepped_request, extra_options, **kwargs): return prepped_request # The job failed for some reason with mock.patch( 'airflow.hooks.http_hook.HttpHook.run_and_check', side_effect=run_and_return ): with mock.patch( 'airflow.hooks.base_hook.BaseHook.get_connection', side_effect=get_airflow_connection ): pr = self.get_hook.run('v1/test', headers={'some_other_header': 'test'}) actual = dict(pr.headers) self.assertEqual(actual.get('bareer'), 'test') self.assertEqual(actual.get('some_other_header'), 'test') @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection') def test_http_connection(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http', host='localhost', schema='http') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'http://localhost') @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection') def test_https_connection(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http', host='localhost', schema='https') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'https://localhost') @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection') def test_host_encoded_http_connection(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http', host='http://localhost') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'http://localhost') @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection') def test_host_encoded_https_connection(self, mock_get_connection): c = Connection(conn_id='http_default', conn_type='http', host='https://localhost') mock_get_connection.return_value = c hook = HttpHook() hook.get_conn({}) self.assertEqual(hook.base_url, 'https://localhost')
class HttpSensor(BaseSensorOperator): """ Executes a HTTP get statement and returns False on failure: 404 not found or response_check function returned False :param http_conn_id: The connection to run the sensor against :type http_conn_id: string :param method: The HTTP request method to use :type method: string :param endpoint: The relative part of the full url :type endpoint: string :param request_params: The parameters to be added to the GET url :type request_params: a dictionary of string key/value pairs :param headers: The HTTP headers to be added to the GET request :type headers: a dictionary of string key/value pairs :param response_check: A check against the 'requests' response object. Returns True for 'pass' and False otherwise. :type response_check: A lambda or defined function. :param extra_options: Extra options for the 'requests' library, see the 'requests' documentation (options to modify timeout, ssl, etc.) :type extra_options: A dictionary of options, where key is string and value depends on the option that's being modified. """ template_fields = ('endpoint', 'request_params') @apply_defaults def __init__(self, endpoint, http_conn_id='http_default', method='GET', request_params=None, headers=None, response_check=None, extra_options=None, *args, **kwargs): super(HttpSensor, self).__init__(*args, **kwargs) self.endpoint = endpoint self.http_conn_id = http_conn_id self.request_params = request_params or {} self.headers = headers or {} self.extra_options = extra_options or {} self.response_check = response_check self.hook = HttpHook( method=method, http_conn_id=http_conn_id) def poke(self, context): self.log.info('Poking: %s', self.endpoint) try: response = self.hook.run(self.endpoint, data=self.request_params, headers=self.headers, extra_options=self.extra_options) if self.response_check: # run content check on response return self.response_check(response) except AirflowException as ae: if str(ae).startswith("404"): return False raise ae return True