Exemple #1
0
def get_spin(**kwargs):
    http = HttpHook('GET', http_conn_id='http_gbfs_spin')

    response = http.run("/api/gbfs/v1/detroit/free_bike_status")

    spins = json.loads(response.text)

    for s in spins['data']['bikes']:
        device_id = s.pop('bike_id')
        lat = s.pop('lat')
        lon = s.pop('lon')

        insert = f"""
      insert into scooters.availability (
        vendor, 
        device_id, 
        timestamp,
        extra,
        geom
      ) values (
        'spin',
        '{device_id}',
        '{kwargs['execution_date']}',
        '{json.dumps(s)}',
        ST_SetSRID(ST_MakePoint({lon},{lat}), 4326)
      )
    """
        pg.run(insert)

    return response
def get_data_zomato_api(*args, **kwargs):

    api_hook = HttpHook(http_conn_id="zomato_api", method='GET')

    data_dict = {}

    schema = {"properties": {"restaurants": {"mergeStrategy": "append"}}}
    merger = Merger(schema)

    for i in range(0, 100, 20):

        endpoint_url = "search?entity_id=3&entity_type=city&start={}&count=20&sort=rating".format(
            i)

        resp_url = api_hook.run(endpoint=endpoint_url)
        resp = json.loads(resp_url.content)

        if i == 0:

            data_dict.update(resp)
            result = data_dict
        else:
            result = merger.merge(result, resp)
        with open(
                "/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json",
                "w") as f:
            json.dump(result, f)
        f.close()
def copy_brazil_data_file(origin_host, origin_filepath, dest_bucket, dest_key):
    """Copy Brazil data file to a local bucket.
    Copy the source file which contains detailed data about Brazil to
    an AWS S3 bucket to make it available to AWS EMR.
    args:
    origin_host (str): host where the source file is in
    origin_filepath (str): full path to the file in the host
    dest_bucket (str): name of the bucket to store the file
    dest_key (str): prefix/name of the file in the destination bucket
    """
    logging.info('Copying Brazil data file ' \
                f'FROM: http://{origin_host}/{origin_filepath} ' \
                f'TO: s3://{dest_bucket}/{dest_key}')

    # Create a connection to the source server
    conn = Connection(conn_id='http_conn_brasilio',
                      conn_type='http',
                      host=origin_host,
                      port=80)  #create a connection object
    session = settings.Session()  # get the session
    session.add(conn)
    session.commit()

    # Get the data file
    http_hook = HttpHook(method='GET', http_conn_id='http_conn_brasilio')
    response_br_data = http_hook.run(origin_filepath)

    # Store data file into s3 bucket
    s3_hook = S3Hook(aws_conn_id='aws_default')
    s3_hook.load_bytes(response_br_data.content,
                       dest_key,
                       bucket_name=dest_bucket,
                       replace=True)

    logging.info('Data copy finished.')
    def test_connection_without_host(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http')
        mock_get_connection.return_value = c

        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'http://')
    def get_typeahead_result(self, http_endpoint, data_headers):
        """Look up BAG verblijfsobject id from typeahead service for non-geometry location data"""

        http = HttpHook(method="GET", http_conn_id=self.http_conn_id)
        http_response = http.run(endpoint=http_endpoint, data=None, headers=data_headers)

        return http_response
Exemple #6
0
    def __init__(self,
                 http_conn_id,
                 token,
                 job_name,
                 data=None,
                 headers=None,
                 method='start',
                 daemon=True,
                 parallelism=0,
                 retry_times=3,
                 retry_sleep_time=1,
                 *args,
                 **kwargs):
        basic_headers = {'Content-Type': "application/json", 'Token': token}
        if headers:
            basic_headers.update(headers)
        self.headers = basic_headers
        self.http_conn_id = http_conn_id
        self.job_name = job_name
        self.http = HttpHook('POST', http_conn_id=self.http_conn_id)

        self.data = data if data is not None else {}
        self.job_last_run_id = dict()
        self.job_pools = []
        self.all_jobs = None
        self.finished_jobs = []
        self.parallelism = parallelism
        self.method = method
        self.daemon = daemon
        self.retry_times = retry_times
        self.retry_sleep_time = retry_sleep_time
        self.start_run_time = time.time()
        self.failed_jobs = defaultdict(int)

        super(_BaseJobOperator, self).__init__(*args, **kwargs)
def print_hello():
    task_id = 'PQS_details'
    #task_id='Lookup_Post',
    http_conn_id = 'qs_default'
    #http_conn_id='lookup_conn',
    method = 'POST'
    data = task
    #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"},
    #data=json.dumps({'ldap' : 'tangupta'}),
    endpoint = 'query/12345@xyzOrg/test'
    #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb',
    headers = {
        "Content-Type":
        "application/json",
        "Authorization":
        "Bearer eyJ4NXUiOiJpbXMta2V5LTEuY2VyIiwiYWxnIjoiUlMyNTYifQ.eyJpZCI6IjE1MTcyNDUzNzgzNDVfYjg2ZGU4ZDgtZTAzZi00OGNmLThmZTAtN2VmNDcxYjFhYzRhX3VlMSIsImNsaWVudF9pZCI6Ik1DRFBDYXRhbG9nU2VydmljZVFhMiIsInVzZXJfaWQiOiJNQ0RQQ2F0YWxvZ1NlcnZpY2VRYTJAQWRvYmVJRCIsInR5cGUiOiJhY2Nlc3NfdG9rZW4iLCJhcyI6Imltcy1uYTEtcWEyIiwicGFjIjoiTUNEUENhdGFsb2dTZXJ2aWNlUWEyX2RldnFhIiwicnRpZCI6IjE1MTcyNDUzNzgzNTBfZjNkOTExMTEtZjFjNi00OTIzLThjNWEtYThmN2RjNGJkNjhhX3VlMSIsInJ0ZWEiOiIxNTE4NDU0OTc4MzUwIiwibW9pIjoiMzI5YjFmNDciLCJjIjoiTnd3VmE4UG9ubDZuWkZpQi8yZjdkUT09IiwiZXhwaXJlc19pbiI6Ijg2NDAwMDAwIiwic2NvcGUiOiJzeXN0ZW0sQWRvYmVJRCxvcGVuaWQiLCJjcmVhdGVkX2F0IjoiMTUxNzI0NTM3ODM0NSJ9.MTuH1VI-b4PcLlr1BHB4a_CXxwn8TbsyQVk3pJi2uoNpng9jhH1W-lfg0pzbhLy0Ji81HlIopbTEGYNYlfXjzE9OtKM-BX7-VQOSXmS7tgAkfec0OheZariZUIdpXUa4YY-BHLsSijkBWs-AUTzZ9SG91M-8cWn_4U7uPSRn2aqEqT92kZgRRl-u5Zq6YteSFXUJCpY64IOO2c-sZfLQ6yQb425qUyn468ECPElZ5h00483xs81ZFF-r8LFhQtQpQ7m9pfNFZWqUexM_RrQNUL6XZmiq7mZ1_wxw1RSIDDMTvt35JU2MUjs6B0c9GwMTL814ej0ewrDV1qhAIliLlA"
    }
    extra_options = {}
    http = HttpHook(method, http_conn_id='qs_default')

    logging.info('Calling HTTP method')
    print(os.environ['PATH'])
    print(os.environ['TEST_PATH'])
    response = http.run('query/12345@xyzOrg/visual', data, headers,
                        extra_options)
    print(response)
    print(response.text)
    print(configuration.get('testing', 'tanuj').encode('utf-8'))
    return 'Hello world!'
    def _download_from_http(self):
        http = HttpHook("GET", http_conn_id=self.http_connection_id)
        self.log.info("Calling HTTP method")
        response = http.run(self.http_endpoint)
        self.log.info(response.text)

        return response.text
Exemple #9
0
    def __init__(self, pre_task_id, token_hook_conn_id, api_hook_conn_id, *args, **kwargs):
        self.pre_task_id = pre_task_id
        self.token_hook = HttpHook(http_conn_id=token_hook_conn_id)

        self.api_hook = HttpHook(http_conn_id=api_hook_conn_id)
        self.token = None
        super(SpiderJobSensor, self).__init__(*args, **kwargs)
Exemple #10
0
 def test_host_encoded_https_connection(self, mock_get_connection):
     c = Connection(conn_id='http_default', conn_type='http',
                    host='https://localhost')
     mock_get_connection.return_value = c
     hook = HttpHook()
     hook.get_conn({})
     self.assertEqual(hook.base_url, 'https://localhost')
    def execute(self, context):
        http = HttpHook(self.method, http_conn_id=self.http_conn_id)
        self.log.info("Calling HTTP method")
        response = http.run(self.endpoint, self.data, self.headers,
                            self.extra_options)

        return str(json.loads(response.text)['id'])
    def _start_metronome_job(self, auth_token):
        """
        Starts the metronome jobs with the ID self.metronome_job_id
        Returns the newly started job run ID

        :param auth_token: authorization token
        :type auth_token: string
        """
        self.log.info("Starting Metronome job")

        http = HttpHook('POST', http_conn_id=self.dcos_http_conn_id)

        self.log.info("Calling HTTP method")
        response = http.run(
            endpoint=
            f"{self.dcos_metronome_jobs_api_endpoint}/{self._metronome_job_id}/runs",
            headers={
                "Content-Type": "application/json",
                "Authorization": f"token={auth_token}"
            })
        self.log.debug(f"Response is: {str(response)}")
        self.log.info(response.text)

        job_run_id = json.loads(response.text)['id']
        self.log.debug(f"Job run id is: {str(job_run_id)}")
        return job_run_id
    def _get_all_metronome_jobs(self, auth_token):
        """
        Returns all currently deployed Metronome jobs on the cluster

        :param auth_token: authorization token
        :type auth_token: string
        """
        self.log.info("Getting all Metronome jobs")

        http = HttpHook('GET', http_conn_id=self.dcos_http_conn_id)

        self.log.info("Calling HTTP method")
        response = http.run(endpoint=self.dcos_metronome_jobs_api_endpoint,
                            data=json.dumps({
                                "uid":
                                self.dcos_robot_user_name,
                                "password":
                                self.dcos_robot_user_pwd
                            }),
                            headers={
                                "Content-Type": "application/json",
                                "Authorization": f"token={auth_token}"
                            })
        self.log.debug(f"Response is: {str(response)}")
        self.log.info(response.text)

        all_metronome_jobs = json.loads(response.text)
        self.log.debug(f"Metronome jobs are: {str(all_metronome_jobs)}")

        return all_metronome_jobs
def sub_dag(child_dag_id, input_file_names, key):
    sub_dag = DAG('{}.{}'.format(dag_id, child_dag_id),
                  default_args=default_args,
                  catchup=False,
                  schedule_interval=vlass_dag.schedule_interval)
    http_conn = HttpHook('GET', http_conn_id)
    auth_conn = HttpHook.get_connection(http_conn_id)

    with http_conn.run('/cred/auth/priv/users/{}'.format(
            auth_conn.login)) as response:
        cert = response.text
        for idx, x in enumerate(input_file_names):
            KubernetesPodOperator(
                dag=sub_dag,
                namespace='default',
                task_id='vlass-transform-{}-{}'.format(idx, key),
                in_cluster=True,
                get_logs=True,
                cmds=['{}_run_single'.format(collection.lower())],
                arguments=[x, cert],
                name='airflow-vlass-transform-pod',
                volumes=[volume],
                volume_mounts=[volume_mount])

    return sub_dag
Exemple #15
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    bcn_covid_measures = res.json()['result']['records']

    bcn_covid_df = pd.DataFrame(bcn_covid_measures)
    bcn_covid_df = bcn_covid_df[[
        '_id', 'Data_Indicador', 'Font', 'Frequencia_Indicador',
        'Nom_Indicador', 'Nom_Variable', 'Territori', 'Unitat', 'Valor'
    ]]
    bcn_covid_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                         inplace=True)
    insert_ts = datetime.utcnow()

    for row in bcn_covid_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))
Exemple #16
0
def insert_rows():

    insert_ts = datetime.utcnow()

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={'codigo': table_variables['codigo']})
    http_hook.check_response(response=res)

    cases_df = pd.DataFrame(res.json()['timeline'])

    for row in cases_df.itertuples(index=False):
        date = row.fecha

        information = pd.Series(row.regiones[0]['data'])

        information = information[['casosConfirmados', 'casosUci', 'casosFallecidos', 'casosHospitalizados', 'casosRecuperados',
                                   'casosConfirmadosDiario', 'casosUciDiario', 'casosFallecidosDiario',
                                   'casosHospitalizadosDiario', 'casosRecuperadosDiario']]
        pg_hook.run(sql_insert, parameters=(date, information[0], information[1],
                                            information[2], information[3], information[4],
                                            information[5], information[6], information[7],
                                            information[8], information[9], insert_ts))
Exemple #17
0
    def _fetch_headers(self, force_refresh=False):
        headers = {"Content-Type": "application/x-ndjson"}
        if not self.protected:
            return headers
        if (
            self.access_token is None
            or time.time() + self.token_expires_margin > self.token_expires_time
            or force_refresh
        ):
            form_params = dict(
                grant_type="client_credentials",
                client_id=OIDC_CLIENT_ID,
                client_secret=OIDC_CLIENT_SECRET,
            )
            http = HttpHook(http_conn_id="oidc_server", method="POST")
            for i in range(3):
                try:
                    response = http.run(OIDC_TOKEN_ENDPOINT, data=form_params)
                except AirflowException:
                    self.log.exception("Keycloak unreachable")
                    time.sleep(1)
                else:
                    break
            else:
                raise
            token_info = response.json()
            self.access_token = token_info["access_token"]
            self.token_expires_time = time.time() + token_info["expires_in"]

        headers["Authorization"] = f"Bearer {self.access_token}"
        return headers
Exemple #18
0
    def _get_weather_data(self, lat, lon):
        """
        Gets the weather data from the specified coordinates and time

        :param lat: latitude to be used as query param.
        :param lon: longitude to be used as query param.
        :return: response retrieved from the API
        """

        open_weather = HttpHook(method="GET",
                                http_conn_id=self.open_weather_conn)
        data = {
            "lat": lat,
            "lon": lon,
            "dt": calendar.timegm(self.date.timetuple()),
            "appid": self.app_id,
            "units": "metric",
        }

        response = open_weather.run("/data/2.5/onecall/timemachine", data=data)

        if response.status_code == 200:
            self.log.info("Weather data successfully retrived from location")
            return self._weather_date_to_datetime(response.json()["hourly"])
        else:
            raise ValueError
def get_rates(ds, **kwargs):
    pg_hook = PostgresHook(postgres_conn_id='rates')
    api_hook = HttpHook(http_conn_id='openexchangerates', method='GET')

    # If either of these raises an exception then we'll be notified via
    # Airflow
    resp = api_hook.run('')
    resp = json.loads(resp.content)

    # These are the only valid pairs the DB supports at the moment. Anything
    # else that turns up will be ignored.
    valid_pairs = (
        'AED', 'AFN', 'ALL', 'AMD', 'ANG', 'AOA', 'ARS',
        'AUD', 'AWG', 'AZN', 'BAM', 'BBD', 'BDT', 'BGN',
        'BHD', 'BIF', 'BMD', 'BND', 'BOB', 'BRL', 'BSD',
        'BTC', 'BTN', 'BWP', 'BYN', 'BYR', 'BZD', 'CAD',
        'CDF', 'CHF', 'CLF', 'CLP', 'CNY', 'COP', 'CRC',
        'CUC', 'CUP', 'CVE', 'CZK', 'DJF', 'DKK', 'DOP',
        'DZD', 'EEK', 'EGP', 'ERN', 'ETB', 'EUR', 'FJD',
        'FKP', 'GBP', 'GEL', 'GGP', 'GHS', 'GIP', 'GMD',
        'GNF', 'GTQ', 'GYD', 'HKD', 'HNL', 'HRK', 'HTG',
        'HUF', 'IDR', 'ILS', 'IMP', 'INR', 'IQD', 'IRR',
        'ISK', 'JEP', 'JMD', 'JOD', 'JPY', 'KES', 'KGS',
        'KHR', 'KMF', 'KPW', 'KRW', 'KWD', 'KYD', 'KZT',
        'LAK', 'LBP', 'LKR', 'LRD', 'LSL', 'LTL', 'LVL',
        'LYD', 'MAD', 'MDL', 'MGA', 'MKD', 'MMK', 'MNT',
        'MOP', 'MRO', 'MTL', 'MUR', 'MVR', 'MWK', 'MXN',
        'MYR', 'MZN', 'NAD', 'NGN', 'NIO', 'NOK', 'NPR',
        'NZD', 'OMR', 'PAB', 'PEN', 'PGK', 'PHP', 'PKR',
        'PLN', 'PYG', 'QAR', 'RON', 'RSD', 'RUB', 'RWF',
        'SAR', 'SBD', 'SCR', 'SDG', 'SEK', 'SGD', 'SHP',
        'SLL', 'SOS', 'SRD', 'STD', 'SVC', 'SYP', 'SZL',
        'THB', 'TJS', 'TMT', 'TND', 'TOP', 'TRY', 'TTD',
        'TWD', 'TZS', 'UAH', 'UGX', 'USD', 'UYU', 'UZS',
        'VEF', 'VND', 'VUV', 'WST', 'XAF', 'XAG', 'XAU',
        'XCD', 'XDR', 'XOF', 'XPD', 'XPF', 'XPT', 'YER',
        'ZAR', 'ZMK', 'ZMW', 'ZWL')

    rates_insert = """INSERT INTO rates (pair, valid_until, rate)
                      VALUES (%s, %s, %s);"""

    # If this raises an exception then we'll be notified via Airflow
    valid_until = datetime.fromtimestamp(resp['timestamp'])

    for (iso2, rate) in resp['rates'].items():
        # If converting the rate to a float fails for whatever reason then
        # just move on.
        try:
            rate = float(rate)
        except:
            continue

        iso2 = iso2.upper().strip()

        if iso2 not in valid_pairs or rate < 0:
            continue

        pg_hook.run(rates_insert, parameters=(iso2,
                                              valid_until,
                                              rate))
Exemple #20
0
 def test_host_encoded_https_connection(self, mock_get_connection):
     c = Connection(conn_id='http_default', conn_type='http',
                    host='https://localhost')
     mock_get_connection.return_value = c
     hook = HttpHook()
     hook.get_conn({})
     self.assertEqual(hook.base_url, 'https://localhost')
    def execute(self, context):
        # Run the training pipeline
        hook = HttpHook(method="POST", http_conn_id=self.conn_id)
        data = json.dumps({
            "environment_public_id": self.environment_id,
            "run_source_commit": self.run_source_commit,
            "resources": self.resources,
        })

        try:
            res = hook.run(
                RunPipelineOperator.RUN_PIPELINE_PATH.format(self.pipeline_id),
                data=data,
                headers=hook.get_connection(self.conn_id).extra_dejson,
            )
        except AirflowException as ex:
            self.log.error("Failed to run pipeline")
            raise ex

        pipeline_run_id = json.loads(res.content)["id"]
        self.pipeline_run_id = pipeline_run_id  # Used for cleanup only
        self.log.info("Pipeline successfully run, pipeline run ID: {}".format(
            pipeline_run_id))

        # Poll pipeline run status
        deadline = datetime.utcnow() + self.run_timeout
        get_hook = HttpHook(method="GET", http_conn_id=self.conn_id)
        while datetime.utcnow() < deadline:
            if self._check_status(get_hook, pipeline_run_id):
                return
            time.sleep(self.status_poke_interval)

        self._cleanup_run(pipeline_run_id)
        raise Exception("Run timed out {}".format(pipeline_run_id))
Exemple #22
0
 def setUp(self):
     session = requests.Session()
     adapter = requests_mock.Adapter()
     session.mount('mock', adapter)
     self.get_hook = HttpHook(method='GET')
     self.post_hook = HttpHook(method='POST')
     configuration.load_test_config()
Exemple #23
0
def store_product_catalog_with_partition(**kwargs):
    task_instance = kwargs['ti']
    get_product_catalog_db_response = task_instance.xcom_pull(
        key=None, task_ids='get_product_catalog_db')
    get_product_catalog_db_json = json.loads(get_product_catalog_db_response)

    http_hook = HttpHook(
        method='POST',
        http_conn_id='product_catalog',
    )

    for index, product_catalog in enumerate(
            get_product_catalog_db_json['rows']):
        product_catalog_document = product_catalog['value']
        product_catalog_document.pop('_rev', None)
        product_catalog_document['_id'] = f"{product_catalog_document['type']}_{product_catalog_document['subtype']}" \
                                          f":{product_catalog_document['_id']}"

        print("request", product_catalog_document)
        print("request", json.dumps(product_catalog_document).encode('utf-8'))
        print("request", json.loads(json.dumps(product_catalog_document)))
        response = http_hook.run(
            endpoint='copy_product',
            headers={"Content-Type": "application/json; charset=utf-8"},
            json=product_catalog_document,
        )

        print("response", response)
Exemple #24
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    unemployment_measures = res.json()['result']['records']

    unemployment_df = pd.DataFrame(unemployment_measures)
    unemployment_df = unemployment_df[[
        '_id', 'Any', 'Mes', 'Codi_Districte', 'Nom_Districte', 'Codi_Barri',
        'Nom_Barri', 'Durada_atur', 'Nombre'
    ]]
    unemployment_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                            inplace=True)
    insert_ts = datetime.utcnow()

    for row in unemployment_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))
Exemple #25
0
def print_hello():
    task_id = 'CG_details'
    #task_id='Lookup_Post',
    http_conn_id = 'cg_default'
    #http_conn_id='lookup_conn',
    method = 'POST'
    data = rq_body_param,
    #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"},
    #data=json.dumps({'ldap' : 'tangupta'}),
    endpoint = 'xyz/test/execute/jobs'
    #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb',
    headers = {
        "Content-Type":
        "application/json",
        "accept":
        "application/json",
        "x-api-key":
        "acp_testing",
        "Authorization":
        "Bearer eyJ4NXUiOiJpbXNfbmExLXN0ZzEta2V5LTEuY2VyIiwiYWxnIjoiUlMyNTYifQ.eyJpZCI6IjE1MTcyMTI4MjAzMjhfNjNkMzI5NjMtOTYzYy00YjA2LTk3MjAtN2M2OTExZDI2Y2E5X3VlMSIsImNsaWVudF9pZCI6ImFjcF90ZXN0aW5nIiwidXNlcl9pZCI6ImFjcF90ZXN0aW5nQEFkb2JlSUQiLCJ0eXBlIjoiYWNjZXNzX3Rva2VuIiwiYXMiOiJpbXMtbmExLXN0ZzEiLCJwYWMiOiJhY3BfdGVzdGluZ19zdGciLCJydGlkIjoiMTUxNzIxMjgyMDMyOV84YzFhYzRhOC1lZjM0LTQ3ZWYtOWFkNi0xMmI0ZTg3MjYzNjdfdWUxIiwicnRlYSI6IjE1MTg0MjI0MjAzMjkiLCJtb2kiOiJkMjVhMzg5ZSIsImMiOiJZSFg3Rld5d2JnaDhTYy9FMW1vaWJBPT0iLCJleHBpcmVzX2luIjoiODY0MDAwMDAiLCJzY29wZSI6ImFjcC5mb3VuZGF0aW9uLmFjY2Vzc0NvbnRyb2wsYWNwLmNvcmUucGlwZWxpbmUsc3lzdGVtLG9wZW5pZCxBZG9iZUlELGFkZGl0aW9uYWxfaW5mby5yb2xlcyxhZGRpdGlvbmFsX2luZm8ucHJvamVjdGVkUHJvZHVjdENvbnRleHQsYWNwLmZvdW5kYXRpb24sYWNwLmZvdW5kYXRpb24uY2F0YWxvZyxhY3AuZGlzY292ZXJ5IiwiY3JlYXRlZF9hdCI6IjE1MTcyMTI4MjAzMjgifQ.Q0eAxwLdkQ7XEDzpVwDtoKsmwySkEN26F85wDWjgo5j8lriO_8hUDEYYTXJjvXd0xOr82OnIQnWrDe8LXGLswH2rUYmR0oC40Wfv_ZMLf6IPyghNSw5QWKMYhOKTq-4n2kFvnvSh2Dq_F3govWSo1OWR609xC-HKLGAfBgWqAvCN5WPGQzQ8e5zeqCgclBTk4noBqJIVV06hJROSiD2Gt7FyC6YNMm3B-fVaOfFb4C2WBeGprQphXsVirMSvt9lWEYKqo5pGHgOlL5U40LeWFQMcnfOcmIntDG56BE3lhdyQeeltYbZlg1_RwsVwL5OcVWCtceyB0PWj9HheqvRsvA"
    }
    extra_options = {}
    http = HttpHook(method, http_conn_id='cg_default')

    logging.info('Calling HTTP method')
    print(os.environ['PATH'])
    response = http.run(endpoint, data, headers, extra_options)
    print(response)
    print(response.text)
    print(configuration.get('testing', 'tanuj').encode('utf-8'))
    return 'Hello world!'
Exemple #26
0
def print_hello():
    task_id = 'IMS_details'
    #task_id='Lookup_Post',
    http_conn_id = 'cg_default'
    #http_conn_id='lookup_conn',
    method = 'POST'
    data = {},
    #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"},
    #data=json.dumps({'ldap' : 'tangupta'}),
    endpoint = ''
    #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb',
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
        "accept": "application/json"
    }

    http = HttpHook(method, http_conn_id='ims_default')

    logging.info('Calling HTTP method')
    print(os.environ['PATH'])
    response = http.run(endpoint, data, headers)
    print(response)
    print(response.text)
    print(configuration.get('testing', 'tanuj').encode('utf-8'))
    return 'Hello world!'
def start_spider(project,
                 spider,
                 token_hook_conn_id,
                 api_hook_conn_id,
                 instance_num=1,
                 **context):
    """
    启动爬虫
    :param project:
    :param spider:
    :param token_hook_conn_id:
    :param api_hook_conn_id:
    :param instance_num:
    :param context:
    :return:
    """
    token_hook = HttpHook(http_conn_id=token_hook_conn_id)
    api_hook = HttpHook(http_conn_id=api_hook_conn_id)

    token = login(token_hook)
    res = run_job(project, spider, api_hook, token, instance_num=instance_num)

    if 'errors' in res:
        raise Exception(res['errors'])
    job_id_list = res['data']
    context['task_instance'].xcom_push(key='job_id_list', value=job_id_list)
Exemple #28
0
def get_lime(**kwargs):
    http = HttpHook('GET', http_conn_id='http_gbfs_lime')

    # get availability endpoint with limit = 1000
    response = http.run("/api/partners/v1/gbfs/detroit/free_bike_status.json")

    limes = json.loads(response.text)

    for l in limes['data']['bikes']:
        device_id = l.pop('bike_id')
        lat = l.pop('lat')
        lon = l.pop('lon')

        insert = f"""
      insert into scooters.availability (
        vendor, 
        device_id, 
        timestamp,
        extra,
        geom
      ) values (
        'lime',
        '{device_id}',
        '{kwargs['execution_date']}',
        '{json.dumps(l)}',
        ST_SetSRID(ST_MakePoint({lon},{lat}), 4326)
      )
    """
        pg.run(insert)

    return response
def snapshot(**kwargs):
    """
      Query the TAP service and snapshot the OMM data.  
      #FIXME: The query should have some conditions to limit the data.
    """

    logging.info('Populating inputs.')
    query = Variable.get('omm_input_uri_query')
    redis = RedisHook(redis_conn_id='redis_default')
    data = {'QUERY': query, 'REQUEST': 'doQuery',
            'LANG': 'ADQL', 'FORMAT': 'csv'}
    http_connection = HttpHook(method='GET', http_conn_id='tap_service_host')
    count = -1

    with http_connection.run('/tap/sync?', parse.urlencode(data)) as response:
        arr = response.text.split('\n')
        count = len(arr)
        logging.info('Found {} items.'.format(count))
        sanitized_uris = []
        for uri in arr[1:]:
            if uri:
                artifact_uri = uri.split('/')[1].strip()
                sanitized_artifact_uri = artifact_uri.replace(
                    '+', '_').replace('%', '__')
                logging.info('Output is {}'.format(sanitized_artifact_uri))
                sanitized_uris.append(sanitized_artifact_uri)
        redis.get_conn().rpush(redis_key, *sanitized_uris)
        redis.get_conn().persist(redis_key)
    return 'Extracted {} items'.format(len(sanitized_uris))
def query_and_extract(**context):
    http_conn = HttpHook('GET', http_conn_id)
    redis_conn = RedisHook(redis_conn_id)
    prev_exec_date = context.get('prev_execution_date')
    next_exec_date = context.get('next_execution_date')
    query_meta = "SELECT fileName FROM archive_files WHERE archiveName = '{}'" \
        " AND ingestDate > '{}' and ingestDate <= '{}' ORDER BY ingestDate".format(collection,
                                                                                   prev_exec_date.strftime(
                                                                                       datetime_format),
                                                                                   next_exec_date.strftime(datetime_format))
    logging.info('Query: {}'.format(query_meta))
    data = {
        'QUERY': query_meta,
        'LANG': 'ADQL',
        'FORMAT': '{}'.format(output_format)
    }

    with http_conn.run('/ad/auth-sync?{}'.format(
            parse.urlencode(data))) as response:
        artifact_files_list = response.text.split()[1:]
        if artifact_files_list:
            redis_key = '{}_{}_{}.{}'.format(collection,
                                             _to_milliseconds(prev_exec_date),
                                             _to_milliseconds(next_exec_date),
                                             output_format)
            redis_conn.get_conn().rpush(redis_key, artifact_files_list)
            return redis_key
    def send_status_msg(**kwargs):
        http_conn_id='ingest_api_connection'
        endpoint='/datasets/status'
        method='PUT'
        headers={
            #'authorization' : 'Bearer ' + kwargs['params']['auth_tok'],
                 'content-type' : 'application/json'}
        extra_options=[]
        
        http = HttpHook(method,
                        http_conn_id=http_conn_id)

        md_fname = os.path.join(os.environ['AIRFLOW_HOME'],
                                    'data/temp', kwargs['run_id'],
                                    'rslt.yml')
        with open(md_fname, 'r') as f:
            md = yaml.safe_load(f)
        data = {'dataset_id' : kwargs['dag_run'].conf['submission_id'],
                'status' : 'QA',
                'message' : 'the process ran',
                'metadata': md}
        print('data: ', data)
        print("Calling HTTP method")

        response = http.run(endpoint,
                            json.dumps(data),
                            headers,
                            extra_options) 
        print(response.text)
Exemple #32
0
 def setUp(self):
     session = requests.Session()
     adapter = requests_mock.Adapter()
     session.mount('mock', adapter)
     self.get_hook = HttpHook(method='GET')
     self.post_hook = HttpHook(method='POST')
     configuration.load_test_config()
 def execute(self, context):
     http = HttpHook(self.method, http_conn_id=self.http_conn_id)
     logging.info("Calling HTTP method")
     response = http.run(self.endpoint,
                         self.data,
                         self.headers,
                         self.extra_options)
     if self.response_check:
         if not self.response_check(response):
             raise AirflowException("Response check returned False.")
Exemple #34
0
    def __init__(self,
                 endpoint,
                 http_conn_id='http_default',
                 method='GET',
                 request_params=None,
                 headers=None,
                 response_check=None,
                 extra_options=None, *args, **kwargs):
        super(HttpSensor, self).__init__(*args, **kwargs)
        self.endpoint = endpoint
        self.http_conn_id = http_conn_id
        self.request_params = request_params or {}
        self.headers = headers or {}
        self.extra_options = extra_options or {}
        self.response_check = response_check

        self.hook = HttpHook(
            method=method,
            http_conn_id=http_conn_id)
def send_schedules_to_screen(**kwargs):  # hope it's op_kwargs + context
    data = json.dumps(kwargs['task_instance'].xcom_pull(task_ids='create_schedules'))
    http = HttpHook(kwargs['method'], http_conn_id=kwargs['http_conn_id'])
    return http.run(kwargs['endpoint'], data, kwargs['headers'])  # response_check from operator is on by default, I guess
Exemple #36
0
def read_connection(**context):
    context['task_instance'].xcom_push(key='my_key', value='my_value')
    hook = HttpHook(http_conn_id='http_default')
    session = hook.get_conn(None)
    print("requests.sessions.Session: ", session)
Exemple #37
0
class TestHttpHook(unittest.TestCase):
    """Test get, post and raise_for_status"""
    def setUp(self):
        session = requests.Session()
        adapter = requests_mock.Adapter()
        session.mount('mock', adapter)
        self.get_hook = HttpHook(method='GET')
        self.post_hook = HttpHook(method='POST')
        configuration.load_test_config()

    @requests_mock.mock()
    def test_raise_for_status_with_200(self, m):

        m.get(
            'http://test:8080/v1/test',
            status_code=200,
            text='{"status":{"status": 200}}',
            reason='OK'
        )
        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):

            resp = self.get_hook.run('v1/test')
            self.assertEqual(resp.text, '{"status":{"status": 200}}')

    @requests_mock.mock()
    @mock.patch('requests.Request')
    def test_get_request_with_port(self, m, request_mock):
        from requests.exceptions import MissingSchema

        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection_with_port
        ):
            expected_url = 'http://test.com:1234/some/endpoint'
            for endpoint in ['some/endpoint', '/some/endpoint']:

                try:
                    self.get_hook.run(endpoint)
                except MissingSchema:
                    pass

                request_mock.assert_called_once_with(
                    mock.ANY,
                    expected_url,
                    headers=mock.ANY,
                    params=mock.ANY
                )

                request_mock.reset_mock()

    @requests_mock.mock()
    def test_get_request_do_not_raise_for_status_if_check_response_is_false(self, m):

        m.get(
            'http://test:8080/v1/test',
            status_code=404,
            text='{"status":{"status": 404}}',
            reason='Bad request'
        )

        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):
            resp = self.get_hook.run('v1/test', extra_options={'check_response': False})
            self.assertEqual(resp.text, '{"status":{"status": 404}}')

    @requests_mock.mock()
    def test_hook_contains_header_from_extra_field(self, m):
        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):
            expected_conn = get_airflow_connection()
            conn = self.get_hook.get_conn()
            self.assertDictContainsSubset(json.loads(expected_conn.extra), conn.headers)
            self.assertEqual(conn.headers.get('bareer'), 'test')

    @requests_mock.mock()
    def test_hook_uses_provided_header(self, m):
        conn = self.get_hook.get_conn(headers={"bareer": "newT0k3n"})
        self.assertEqual(conn.headers.get('bareer'), "newT0k3n")

    @requests_mock.mock()
    def test_hook_has_no_header_from_extra(self, m):
        conn = self.get_hook.get_conn()
        self.assertIsNone(conn.headers.get('bareer'))

    @requests_mock.mock()
    def test_hooks_header_from_extra_is_overridden(self, m):
        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):
            conn = self.get_hook.get_conn(headers={"bareer": "newT0k3n"})
            self.assertEqual(conn.headers.get('bareer'), 'newT0k3n')

    @requests_mock.mock()
    def test_post_request(self, m):

        m.post(
            'http://test:8080/v1/test',
            status_code=200,
            text='{"status":{"status": 200}}',
            reason='OK'
        )

        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):
            resp = self.post_hook.run('v1/test')
            self.assertEqual(resp.status_code, 200)

    @requests_mock.mock()
    def test_post_request_with_error_code(self, m):

        m.post(
            'http://test:8080/v1/test',
            status_code=418,
            text='{"status":{"status": 418}}',
            reason='I\'m a teapot'
        )

        with mock.patch(
            'airflow.hooks.base_hook.BaseHook.get_connection',
            side_effect=get_airflow_connection
        ):
            with self.assertRaises(AirflowException):
                self.post_hook.run('v1/test')

    @requests_mock.mock()
    def test_post_request_do_not_raise_for_status_if_check_response_is_false(self, m):

        m.post(
            'http://*****:*****@mock.patch('airflow.hooks.http_hook.requests.Session')
    def test_retry_on_conn_error(self, mocked_session):

        retry_args = dict(
            wait=tenacity.wait_none(),
            stop=tenacity.stop_after_attempt(7),
            retry=requests.exceptions.ConnectionError
        )

        def send_and_raise(request, **kwargs):
            raise requests.exceptions.ConnectionError

        mocked_session().send.side_effect = send_and_raise
        # The job failed for some reason
        with self.assertRaises(tenacity.RetryError):
            self.get_hook.run_with_advanced_retry(
                endpoint='v1/test',
                _retry_args=retry_args
            )
        self.assertEqual(
            self.get_hook._retry_obj.stop.max_attempt_number + 1,
            mocked_session.call_count
        )

    def test_header_from_extra_and_run_method_are_merged(self):

        def run_and_return(session, prepped_request, extra_options, **kwargs):
            return prepped_request

        # The job failed for some reason
        with mock.patch(
            'airflow.hooks.http_hook.HttpHook.run_and_check',
            side_effect=run_and_return
        ):
            with mock.patch(
                'airflow.hooks.base_hook.BaseHook.get_connection',
                side_effect=get_airflow_connection
            ):
                pr = self.get_hook.run('v1/test', headers={'some_other_header': 'test'})
                actual = dict(pr.headers)
                self.assertEqual(actual.get('bareer'), 'test')
                self.assertEqual(actual.get('some_other_header'), 'test')

    @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection')
    def test_http_connection(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http',
                       host='localhost', schema='http')
        mock_get_connection.return_value = c
        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'http://localhost')

    @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection')
    def test_https_connection(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http',
                       host='localhost', schema='https')
        mock_get_connection.return_value = c
        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'https://localhost')

    @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection')
    def test_host_encoded_http_connection(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http',
                       host='http://localhost')
        mock_get_connection.return_value = c
        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'http://localhost')

    @mock.patch('airflow.hooks.http_hook.HttpHook.get_connection')
    def test_host_encoded_https_connection(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http',
                       host='https://localhost')
        mock_get_connection.return_value = c
        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'https://localhost')
Exemple #38
0
class HttpSensor(BaseSensorOperator):
    """
    Executes a HTTP get statement and returns False on failure:
        404 not found or response_check function returned False

    :param http_conn_id: The connection to run the sensor against
    :type http_conn_id: string
    :param method: The HTTP request method to use
    :type method: string
    :param endpoint: The relative part of the full url
    :type endpoint: string
    :param request_params: The parameters to be added to the GET url
    :type request_params: a dictionary of string key/value pairs
    :param headers: The HTTP headers to be added to the GET request
    :type headers: a dictionary of string key/value pairs
    :param response_check: A check against the 'requests' response object.
        Returns True for 'pass' and False otherwise.
    :type response_check: A lambda or defined function.
    :param extra_options: Extra options for the 'requests' library, see the
        'requests' documentation (options to modify timeout, ssl, etc.)
    :type extra_options: A dictionary of options, where key is string and value
        depends on the option that's being modified.
    """

    template_fields = ('endpoint', 'request_params')

    @apply_defaults
    def __init__(self,
                 endpoint,
                 http_conn_id='http_default',
                 method='GET',
                 request_params=None,
                 headers=None,
                 response_check=None,
                 extra_options=None, *args, **kwargs):
        super(HttpSensor, self).__init__(*args, **kwargs)
        self.endpoint = endpoint
        self.http_conn_id = http_conn_id
        self.request_params = request_params or {}
        self.headers = headers or {}
        self.extra_options = extra_options or {}
        self.response_check = response_check

        self.hook = HttpHook(
            method=method,
            http_conn_id=http_conn_id)

    def poke(self, context):
        self.log.info('Poking: %s', self.endpoint)
        try:
            response = self.hook.run(self.endpoint,
                                     data=self.request_params,
                                     headers=self.headers,
                                     extra_options=self.extra_options)
            if self.response_check:
                # run content check on response
                return self.response_check(response)
        except AirflowException as ae:
            if str(ae).startswith("404"):
                return False

            raise ae

        return True