def setUp(self):
     session = requests.Session()
     adapter = requests_mock.Adapter()
     session.mount('mock', adapter)
     self.get_hook = HttpHook(method='GET')
     self.get_lowercase_hook = HttpHook(method='get')
     self.post_hook = HttpHook(method='POST')
     configuration.load_test_config()
    def execute(self, context):
        http = HttpHook(self.method, http_conn_id=self.http_conn_id)
        self.log.info("data :" + self.jobId)
        self.log.info("api_key : " + self.api_key)
        self.log.info("Calling HTTP method")
        response = http.run(self.endpoint+"?api_key=" + self.api_key,
                              self.data,
                              self.headers,
                              None)
        self.log.info("Launch job call status : " + str(response.status_code))
        self.log.info("Response : " + str(response.text))

        if self.response_check:
            if not self.response_check(response):
                raise AirflowException("Response check returned False with code : " + str(response.status_code) + " and message : " + response.text) 
        
        rJson = RespAsync(response.text)
        eta = rJson.eta
        retryInterval = rJson.retryInterval
        job = rJson.jobId
        
       # timestamp = int(time.time())
        #timestamp = datetime.timestamp(now)
        httpCallback = HttpHook('GET', http_conn_id=self.http_conn_id)

        time.sleep(eta)
        while True:
            respCallback = httpCallback.run(self.endpointAsync+"/"+job+"?api_key=" + self.api_key,
                              None,
                              self.headers,
                              self.extra_options)
            
            if respCallback.status_code != 200:
                raise AirflowException("Error while calling callback method check returned False with code : " + str(respCallback.status_code) + " and message : " + respCallback.text)
            
            retryResp = RespAsyncRetry(respCallback.text)
            self.log.info("Retry call with status : " + retryResp.status)

            if retryResp.status == "PENDING":
                self.log.info("waiting ...")
                time.sleep(retryInterval)
                continue
            else:
                if self.response_check_callback:
                    if not self.response_check_callback(retryResp):
                        raise AirflowException("Response check returned False with code : " + str(retryResp.status) + " message : " + retryResp.message)
        
            if self.xcom_push_flag:
                return respCallback.text
            break
Esempio n. 3
0
def test(token_hook_conn_id, api_hook_conn_id):
    token_hook = HttpHook(http_conn_id=token_hook_conn_id)
    api_hook = HttpHook(http_conn_id=api_hook_conn_id)

    token = login(token_hook)
    logging.debug("登录获取的token:%s", token)

    res = run_job('fund_cn', 'tt_fund_bank_list', api_hook, token)

    job_id = res['data']
    logging.info("启动作用,id为%s", job_id)

    res = query_job_status(job_id, api_hook, token)
    logging.info("查询作业状态:%s", res)
Esempio n. 4
0
    def execute(self, context):
        job_label = '({}/{})'.format(self.project, self.job)

        get_hook = HttpHook(http_conn_id='cml_rest_api', method='GET')
        post_hook = HttpHook(http_conn_id='cml_rest_api', method='POST')
        projects_url = 'api/v2/projects'
        r = get_hook.run(endpoint=projects_url)
        projects = {p['name']: p['id']
                    for p in r.json()['projects']} if r.ok else None

        if projects and self.project in projects.keys():
            jobs_url = '{}/{}/jobs'.format(projects_url,
                                           projects[self.project])
            r = get_hook.run(endpoint=jobs_url)
            jobs = {j['name']: j['id']
                    for j in r.json()['jobs']} if r.ok else None

            if jobs and self.job in jobs.keys():
                runs_url = '{}/{}/runs'.format(jobs_url, jobs[self.job])
                r = post_hook.run(endpoint=runs_url)
                run = r.json() if r.ok else None

                if run:
                    status = run['status']
                    RUNNING_STATES = [
                        'ENGINE_SCHEDULING', 'ENGINE_STARTING',
                        'ENGINE_RUNNING'
                    ]
                    SUCCESS_STATES = ['ENGINE_SUCCEEDED']
                    POLL_INTERVAL = 10
                    while status and status in RUNNING_STATES:
                        run_id_url = '{}/{}'.format(runs_url, run['id'])
                        r = get_hook.run(endpoint=run_id_url)
                        status = r.json()['status'] if r.ok else None
                        time.sleep(POLL_INTERVAL)
                    if status not in SUCCESS_STATES:
                        raise AirflowException(
                            'Error while waiting for CML job ({}) to complete'.
                            format(job_label))
                else:
                    raise AirflowException(
                        'Problem triggering CML job ({})'.format(job_label))
            else:
                raise AirflowException(
                    'Problem finding the CML job ID ({})'.format(self.job))
        else:
            raise AirflowException(
                'Problem finding the CML project ID ({})'.format(self.project))
Esempio n. 5
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    unemployment_measures = res.json()['result']['records']

    unemployment_df = pd.DataFrame(unemployment_measures)
    unemployment_df = unemployment_df[[
        '_id', 'Any', 'Mes', 'Codi_Districte', 'Nom_Districte', 'Codi_Barri',
        'Nom_Barri', 'Durada_atur', 'Nombre'
    ]]
    unemployment_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                            inplace=True)
    insert_ts = datetime.utcnow()

    for row in unemployment_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))
def copy_brazil_data_file(origin_host, origin_filepath, dest_bucket, dest_key):
    """Copy Brazil data file to a local bucket.
    Copy the source file which contains detailed data about Brazil to
    an AWS S3 bucket to make it available to AWS EMR.
    args:
    origin_host (str): host where the source file is in
    origin_filepath (str): full path to the file in the host
    dest_bucket (str): name of the bucket to store the file
    dest_key (str): prefix/name of the file in the destination bucket
    """
    logging.info('Copying Brazil data file ' \
                f'FROM: http://{origin_host}/{origin_filepath} ' \
                f'TO: s3://{dest_bucket}/{dest_key}')

    # Create a connection to the source server
    conn = Connection(conn_id='http_conn_brasilio',
                      conn_type='http',
                      host=origin_host,
                      port=80)  #create a connection object
    session = settings.Session()  # get the session
    session.add(conn)
    session.commit()

    # Get the data file
    http_hook = HttpHook(method='GET', http_conn_id='http_conn_brasilio')
    response_br_data = http_hook.run(origin_filepath)

    # Store data file into s3 bucket
    s3_hook = S3Hook(aws_conn_id='aws_default')
    s3_hook.load_bytes(response_br_data.content,
                       dest_key,
                       bucket_name=dest_bucket,
                       replace=True)

    logging.info('Data copy finished.')
Esempio n. 7
0
 def check_spark_app_status(self, app_id):
     logging.info(
         "Getting app status (id={app_id}) from Spark REST API...".format(
             app_id=app_id))
     endpoint = "{SPARK_ENDPOINT}/{app_id}/jobs".format(
         SPARK_ENDPOINT=SPARK_ENDPOINT, app_id=app_id)
     response = HttpHook(method="GET",
                         http_conn_id=self.http_conn_id_spark).run(endpoint)
     try:
         jobs = json.loads(response.content)
         expected_status = "SUCCEEDED"
         for job in jobs:
             job_id = job["jobId"]
             job_status = job["status"]
             logging.info(
                 "Job id {job_id} associated with application '{app_id}' "
                 "is '{job_status}'".format(job_id=job_id,
                                            app_id=app_id,
                                            job_status=job_status))
             if job_status != expected_status:
                 raise AirflowException(
                     "Job id '{job_id}' associated with application '{app_id}' "
                     "is '{job_status}', expected status is '{expected_status}'"
                     .format(job_id=job_id,
                             app_id=app_id,
                             job_status=job_status,
                             expected_status=expected_status))
     except (JSONDecodeError, LookupError, TypeError) as ex:
         log_response_error("$.jobId, $.status", response)
         raise AirflowBadRequest(ex)
Esempio n. 8
0
    def _fetch_headers(self, force_refresh=False):
        headers = {"Content-Type": "application/x-ndjson"}
        if not self.protected:
            return headers
        if (
            self.access_token is None
            or time.time() + self.token_expires_margin > self.token_expires_time
            or force_refresh
        ):
            form_params = dict(
                grant_type="client_credentials",
                client_id=OIDC_CLIENT_ID,
                client_secret=OIDC_CLIENT_SECRET,
            )
            http = HttpHook(http_conn_id="oidc_server", method="POST")
            for i in range(3):
                try:
                    response = http.run(OIDC_TOKEN_ENDPOINT, data=form_params)
                except AirflowException:
                    self.log.exception("Keycloak unreachable")
                    time.sleep(1)
                else:
                    break
            else:
                raise
            token_info = response.json()
            self.access_token = token_info["access_token"]
            self.token_expires_time = time.time() + token_info["expires_in"]

        headers["Authorization"] = f"Bearer {self.access_token}"
        return headers
def query_and_extract(**context):
    http_conn = HttpHook('GET', http_conn_id)
    redis_conn = RedisHook(redis_conn_id)
    prev_exec_date = context.get('prev_execution_date')
    next_exec_date = context.get('next_execution_date')
    query_meta = "SELECT fileName FROM archive_files WHERE archiveName = '{}'" \
        " AND ingestDate > '{}' and ingestDate <= '{}' ORDER BY ingestDate".format(collection,
                                                                                   prev_exec_date.strftime(
                                                                                       datetime_format),
                                                                                   next_exec_date.strftime(datetime_format))
    logging.info('Query: {}'.format(query_meta))
    data = {
        'QUERY': query_meta,
        'LANG': 'ADQL',
        'FORMAT': '{}'.format(output_format)
    }

    with http_conn.run('/ad/auth-sync?{}'.format(
            parse.urlencode(data))) as response:
        artifact_files_list = response.text.split()[1:]
        if artifact_files_list:
            redis_key = '{}_{}_{}.{}'.format(collection,
                                             _to_milliseconds(prev_exec_date),
                                             _to_milliseconds(next_exec_date),
                                             output_format)
            redis_conn.get_conn().rpush(redis_key, artifact_files_list)
            return redis_key
Esempio n. 10
0
def get_data_zomato_api(*args, **kwargs):

    api_hook = HttpHook(http_conn_id="zomato_api", method='GET')

    data_dict = {}

    schema = {"properties": {"restaurants": {"mergeStrategy": "append"}}}
    merger = Merger(schema)

    for i in range(0, 100, 20):

        endpoint_url = "search?entity_id=3&entity_type=city&start={}&count=20&sort=rating".format(
            i)

        resp_url = api_hook.run(endpoint=endpoint_url)
        resp = json.loads(resp_url.content)

        if i == 0:

            data_dict.update(resp)
            result = data_dict
        else:
            result = merger.merge(result, resp)
        with open(
                "/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json",
                "w") as f:
            json.dump(result, f)
        f.close()
Esempio n. 11
0
def get_spin(**kwargs):
    http = HttpHook('GET', http_conn_id='http_gbfs_spin')

    response = http.run("/api/gbfs/v1/detroit/free_bike_status")

    spins = json.loads(response.text)

    for s in spins['data']['bikes']:
        device_id = s.pop('bike_id')
        lat = s.pop('lat')
        lon = s.pop('lon')

        insert = f"""
      insert into scooters.availability (
        vendor, 
        device_id, 
        timestamp,
        extra,
        geom
      ) values (
        'spin',
        '{device_id}',
        '{kwargs['execution_date']}',
        '{json.dumps(s)}',
        ST_SetSRID(ST_MakePoint({lon},{lat}), 4326)
      )
    """
        pg.run(insert)

    return response
Esempio n. 12
0
 def poke(self, context):
     logging.info("Getting session {session_id} status...".format(
         session_id=self.session_id))
     endpoint = "{ENDPOINT}/{session_id}/state".format(
         ENDPOINT=ENDPOINT, session_id=self.session_id)
     response = HttpHook(method="GET",
                         http_conn_id=self.http_conn_id).run(endpoint)
     try:
         state = json.loads(response.content)["state"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.state", response, self.session_id)
         raise AirflowBadRequest(ex)
     if state == "starting":
         logging.info("Session {session_id} is starting...".format(
             session_id=self.session_id))
         return False
     if state == "idle":
         logging.info(
             "Session {session_id} is ready to receive statements.".format(
                 session_id=self.session_id))
         return True
     raise AirflowException(
         "Session {session_id} failed to start. "
         "State='{state}'. Expected states: 'starting' or 'idle' (ready).".
         format(session_id=self.session_id, state=state))
Esempio n. 13
0
 def spill_session_logs(self):
     dashes = '-' * 50
     logging.info(
         "{dashes}Full log for session {session_id}{dashes}".format(
             dashes=dashes, session_id=self.session_id))
     endpoint = "{ENDPOINT}/{session_id}/log".format(
         ENDPOINT=ENDPOINT, session_id=self.session_id)
     hook = HttpHook(method="GET", http_conn_id=self.http_conn_id)
     line_from = 0
     line_to = LOG_PAGE_LINES
     while True:
         log_page = self.fetch_log_page(hook, endpoint, line_from, line_to)
         try:
             logs = log_page["log"]
             for log in logs:
                 logging.info(log.replace("\\n", "\n"))
             actual_line_from = log_page["from"]
             total_lines = log_page["total"]
         except LookupError as ex:
             log_response_error("$.log, $.from, $.total", log_page,
                                self.session_id)
             raise AirflowBadRequest(ex)
         actual_lines = len(logs)
         if actual_line_from + actual_lines >= total_lines:
             logging.info("{dashes}End of full log for session {session_id}"
                          "{dashes}".format(dashes=dashes,
                                            session_id=self.session_id))
             break
         line_from = actual_line_from + actual_lines
Esempio n. 14
0
 def poke(self, context):
     logging.info("Getting status for statement {statement_id} "
                  "in session {session_id}".format(
                      statement_id=self.statement_id,
                      session_id=self.session_id))
     endpoint = "{ENDPOINT}/{session_id}/statements/{statement_id}"\
         .format(ENDPOINT=ENDPOINT, session_id=self.session_id, statement_id=self.statement_id)
     response = HttpHook(method="GET",
                         http_conn_id=self.http_conn_id).run(endpoint)
     try:
         statement = json.loads(response.content)
         state = statement["state"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.state", response, self.session_id,
                            self.statement_id)
         raise AirflowBadRequest(ex)
     if state in ["waiting", "running"]:
         logging.info("Statement {statement_id} in session {session_id} "
                      "has not finished yet (state is '{state}')".format(
                          statement_id=self.statement_id,
                          session_id=self.session_id,
                          state=state))
         return False
     if state == "available":
         self.__check_status(statement, response)
         return True
     raise AirflowBadRequest(
         "Statement {statement_id} in session {session_id} failed due to "
         "an unknown state: '{state}'.\nKnown states: 'waiting', 'running', "
         "'available'".format(statement_id=self.statement_id,
                              session_id=self.session_id,
                              state=state))
Esempio n. 15
0
def print_hello():
    task_id = 'CG_details'
    #task_id='Lookup_Post',
    http_conn_id = 'cg_default'
    #http_conn_id='lookup_conn',
    method = 'POST'
    data = rq_body_param,
    #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"},
    #data=json.dumps({'ldap' : 'tangupta'}),
    endpoint = 'xyz/test/execute/jobs'
    #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb',
    headers = {
        "Content-Type":
        "application/json",
        "accept":
        "application/json",
        "x-api-key":
        "acp_testing",
        "Authorization":
        "Bearer eyJ4NXUiOiJpbXNfbmExLXN0ZzEta2V5LTEuY2VyIiwiYWxnIjoiUlMyNTYifQ.eyJpZCI6IjE1MTcyMTI4MjAzMjhfNjNkMzI5NjMtOTYzYy00YjA2LTk3MjAtN2M2OTExZDI2Y2E5X3VlMSIsImNsaWVudF9pZCI6ImFjcF90ZXN0aW5nIiwidXNlcl9pZCI6ImFjcF90ZXN0aW5nQEFkb2JlSUQiLCJ0eXBlIjoiYWNjZXNzX3Rva2VuIiwiYXMiOiJpbXMtbmExLXN0ZzEiLCJwYWMiOiJhY3BfdGVzdGluZ19zdGciLCJydGlkIjoiMTUxNzIxMjgyMDMyOV84YzFhYzRhOC1lZjM0LTQ3ZWYtOWFkNi0xMmI0ZTg3MjYzNjdfdWUxIiwicnRlYSI6IjE1MTg0MjI0MjAzMjkiLCJtb2kiOiJkMjVhMzg5ZSIsImMiOiJZSFg3Rld5d2JnaDhTYy9FMW1vaWJBPT0iLCJleHBpcmVzX2luIjoiODY0MDAwMDAiLCJzY29wZSI6ImFjcC5mb3VuZGF0aW9uLmFjY2Vzc0NvbnRyb2wsYWNwLmNvcmUucGlwZWxpbmUsc3lzdGVtLG9wZW5pZCxBZG9iZUlELGFkZGl0aW9uYWxfaW5mby5yb2xlcyxhZGRpdGlvbmFsX2luZm8ucHJvamVjdGVkUHJvZHVjdENvbnRleHQsYWNwLmZvdW5kYXRpb24sYWNwLmZvdW5kYXRpb24uY2F0YWxvZyxhY3AuZGlzY292ZXJ5IiwiY3JlYXRlZF9hdCI6IjE1MTcyMTI4MjAzMjgifQ.Q0eAxwLdkQ7XEDzpVwDtoKsmwySkEN26F85wDWjgo5j8lriO_8hUDEYYTXJjvXd0xOr82OnIQnWrDe8LXGLswH2rUYmR0oC40Wfv_ZMLf6IPyghNSw5QWKMYhOKTq-4n2kFvnvSh2Dq_F3govWSo1OWR609xC-HKLGAfBgWqAvCN5WPGQzQ8e5zeqCgclBTk4noBqJIVV06hJROSiD2Gt7FyC6YNMm3B-fVaOfFb4C2WBeGprQphXsVirMSvt9lWEYKqo5pGHgOlL5U40LeWFQMcnfOcmIntDG56BE3lhdyQeeltYbZlg1_RwsVwL5OcVWCtceyB0PWj9HheqvRsvA"
    }
    extra_options = {}
    http = HttpHook(method, http_conn_id='cg_default')

    logging.info('Calling HTTP method')
    print(os.environ['PATH'])
    response = http.run(endpoint, data, headers, extra_options)
    print(response)
    print(response.text)
    print(configuration.get('testing', 'tanuj').encode('utf-8'))
    return 'Hello world!'
Esempio n. 16
0
def store_product_catalog_with_partition(**kwargs):
    task_instance = kwargs['ti']
    get_product_catalog_db_response = task_instance.xcom_pull(
        key=None, task_ids='get_product_catalog_db')
    get_product_catalog_db_json = json.loads(get_product_catalog_db_response)

    http_hook = HttpHook(
        method='POST',
        http_conn_id='product_catalog',
    )

    for index, product_catalog in enumerate(
            get_product_catalog_db_json['rows']):
        product_catalog_document = product_catalog['value']
        product_catalog_document.pop('_rev', None)
        product_catalog_document['_id'] = f"{product_catalog_document['type']}_{product_catalog_document['subtype']}" \
                                          f":{product_catalog_document['_id']}"

        print("request", product_catalog_document)
        print("request", json.dumps(product_catalog_document).encode('utf-8'))
        print("request", json.loads(json.dumps(product_catalog_document)))
        response = http_hook.run(
            endpoint='copy_product',
            headers={"Content-Type": "application/json; charset=utf-8"},
            json=product_catalog_document,
        )

        print("response", response)
Esempio n. 17
0
def get_rates(ds, **kwargs):
    pg_hook = PostgresHook(postgres_conn_id='rates')
    api_hook = HttpHook(http_conn_id='openexchangerates', method='GET')

    # If either of these raises an exception then we'll be notified via
    # Airflow
    resp = api_hook.run('')
    resp = json.loads(resp.content)

    # These are the only valid pairs the DB supports at the moment. Anything
    # else that turns up will be ignored.
    valid_pairs = (
        'AED', 'AFN', 'ALL', 'AMD', 'ANG', 'AOA', 'ARS',
        'AUD', 'AWG', 'AZN', 'BAM', 'BBD', 'BDT', 'BGN',
        'BHD', 'BIF', 'BMD', 'BND', 'BOB', 'BRL', 'BSD',
        'BTC', 'BTN', 'BWP', 'BYN', 'BYR', 'BZD', 'CAD',
        'CDF', 'CHF', 'CLF', 'CLP', 'CNY', 'COP', 'CRC',
        'CUC', 'CUP', 'CVE', 'CZK', 'DJF', 'DKK', 'DOP',
        'DZD', 'EEK', 'EGP', 'ERN', 'ETB', 'EUR', 'FJD',
        'FKP', 'GBP', 'GEL', 'GGP', 'GHS', 'GIP', 'GMD',
        'GNF', 'GTQ', 'GYD', 'HKD', 'HNL', 'HRK', 'HTG',
        'HUF', 'IDR', 'ILS', 'IMP', 'INR', 'IQD', 'IRR',
        'ISK', 'JEP', 'JMD', 'JOD', 'JPY', 'KES', 'KGS',
        'KHR', 'KMF', 'KPW', 'KRW', 'KWD', 'KYD', 'KZT',
        'LAK', 'LBP', 'LKR', 'LRD', 'LSL', 'LTL', 'LVL',
        'LYD', 'MAD', 'MDL', 'MGA', 'MKD', 'MMK', 'MNT',
        'MOP', 'MRO', 'MTL', 'MUR', 'MVR', 'MWK', 'MXN',
        'MYR', 'MZN', 'NAD', 'NGN', 'NIO', 'NOK', 'NPR',
        'NZD', 'OMR', 'PAB', 'PEN', 'PGK', 'PHP', 'PKR',
        'PLN', 'PYG', 'QAR', 'RON', 'RSD', 'RUB', 'RWF',
        'SAR', 'SBD', 'SCR', 'SDG', 'SEK', 'SGD', 'SHP',
        'SLL', 'SOS', 'SRD', 'STD', 'SVC', 'SYP', 'SZL',
        'THB', 'TJS', 'TMT', 'TND', 'TOP', 'TRY', 'TTD',
        'TWD', 'TZS', 'UAH', 'UGX', 'USD', 'UYU', 'UZS',
        'VEF', 'VND', 'VUV', 'WST', 'XAF', 'XAG', 'XAU',
        'XCD', 'XDR', 'XOF', 'XPD', 'XPF', 'XPT', 'YER',
        'ZAR', 'ZMK', 'ZMW', 'ZWL')

    rates_insert = """INSERT INTO rates (pair, valid_until, rate)
                      VALUES (%s, %s, %s);"""

    # If this raises an exception then we'll be notified via Airflow
    valid_until = datetime.fromtimestamp(resp['timestamp'])

    for (iso2, rate) in resp['rates'].items():
        # If converting the rate to a float fails for whatever reason then
        # just move on.
        try:
            rate = float(rate)
        except:
            continue

        iso2 = iso2.upper().strip()

        if iso2 not in valid_pairs or rate < 0:
            continue

        pg_hook.run(rates_insert, parameters=(iso2,
                                              valid_until,
                                              rate))
    def test_connection_without_host(self, mock_get_connection):
        c = Connection(conn_id='http_default', conn_type='http')
        mock_get_connection.return_value = c

        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'http://')
Esempio n. 19
0
def print_hello():
    task_id = 'IMS_details'
    #task_id='Lookup_Post',
    http_conn_id = 'cg_default'
    #http_conn_id='lookup_conn',
    method = 'POST'
    data = {},
    #data={"sql":"SELECT a.shape_name, a.num_sides, b.color_name, b.red_value, b.green_value, b.blue_value FROM shapes_production a, colors_production b WHERE a.color_name = b.color_name LIMIT 5;"},
    #data=json.dumps({'ldap' : 'tangupta'}),
    endpoint = ''
    #endpoint='/lookup/dataSets/testCollection/keys/ldap?imsOrg=testDb',
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
        "accept": "application/json"
    }

    http = HttpHook(method, http_conn_id='ims_default')

    logging.info('Calling HTTP method')
    print(os.environ['PATH'])
    response = http.run(endpoint, data, headers)
    print(response)
    print(response.text)
    print(configuration.get('testing', 'tanuj').encode('utf-8'))
    return 'Hello world!'
def snapshot(**kwargs):
    """
      Query the TAP service and snapshot the OMM data.  
      #FIXME: The query should have some conditions to limit the data.
    """

    logging.info('Populating inputs.')
    query = Variable.get('omm_input_uri_query')
    redis = RedisHook(redis_conn_id='redis_default')
    data = {'QUERY': query, 'REQUEST': 'doQuery',
            'LANG': 'ADQL', 'FORMAT': 'csv'}
    http_connection = HttpHook(method='GET', http_conn_id='tap_service_host')
    count = -1

    with http_connection.run('/tap/sync?', parse.urlencode(data)) as response:
        arr = response.text.split('\n')
        count = len(arr)
        logging.info('Found {} items.'.format(count))
        sanitized_uris = []
        for uri in arr[1:]:
            if uri:
                artifact_uri = uri.split('/')[1].strip()
                sanitized_artifact_uri = artifact_uri.replace(
                    '+', '_').replace('%', '__')
                logging.info('Output is {}'.format(sanitized_artifact_uri))
                sanitized_uris.append(sanitized_artifact_uri)
        redis.get_conn().rpush(redis_key, *sanitized_uris)
        redis.get_conn().persist(redis_key)
    return 'Extracted {} items'.format(len(sanitized_uris))
Esempio n. 21
0
def get_lime(**kwargs):
    http = HttpHook('GET', http_conn_id='http_gbfs_lime')

    # get availability endpoint with limit = 1000
    response = http.run("/api/partners/v1/gbfs/detroit/free_bike_status.json")

    limes = json.loads(response.text)

    for l in limes['data']['bikes']:
        device_id = l.pop('bike_id')
        lat = l.pop('lat')
        lon = l.pop('lon')

        insert = f"""
      insert into scooters.availability (
        vendor, 
        device_id, 
        timestamp,
        extra,
        geom
      ) values (
        'lime',
        '{device_id}',
        '{kwargs['execution_date']}',
        '{json.dumps(l)}',
        ST_SetSRID(ST_MakePoint({lon},{lat}), 4326)
      )
    """
        pg.run(insert)

    return response
    def _download_from_http(self):
        http = HttpHook("GET", http_conn_id=self.http_connection_id)
        self.log.info("Calling HTTP method")
        response = http.run(self.http_endpoint)
        self.log.info(response.text)

        return response.text
Esempio n. 23
0
 def spill_batch_logs(self):
     dashes = 50
     logging.info(
         f"{'-'*dashes}Full log for batch {self.batch_id}{'-'*dashes}")
     endpoint = f"{LIVY_ENDPOINT}/{self.batch_id}/log"
     hook = HttpHook(method="GET", http_conn_id=self.http_conn_id_livy)
     line_from = 0
     line_to = LOG_PAGE_LINES
     while True:
         log_page = self.fetch_log_page(hook, endpoint, line_from, line_to)
         try:
             logs = log_page["log"]
             for log in logs:
                 logging.info(log.replace("\\n", "\n"))
             actual_line_from = log_page["from"]
             total_lines = log_page["total"]
         except LookupError as ex:
             log_response_error("$.log, $.from, $.total", log_page)
             raise AirflowBadRequest(ex)
         actual_lines = len(logs)
         if actual_line_from + actual_lines >= total_lines:
             logging.info(
                 f"{'-' * dashes}End of full log for batch {self.batch_id}"
                 f"{'-' * dashes}")
             break
         line_from = actual_line_from + actual_lines
    def send_status_msg(**kwargs):
        http_conn_id='ingest_api_connection'
        endpoint='/datasets/status'
        method='PUT'
        headers={
            #'authorization' : 'Bearer ' + kwargs['params']['auth_tok'],
                 'content-type' : 'application/json'}
        extra_options=[]
        
        http = HttpHook(method,
                        http_conn_id=http_conn_id)

        md_fname = os.path.join(os.environ['AIRFLOW_HOME'],
                                    'data/temp', kwargs['run_id'],
                                    'rslt.yml')
        with open(md_fname, 'r') as f:
            md = yaml.safe_load(f)
        data = {'dataset_id' : kwargs['dag_run'].conf['submission_id'],
                'status' : 'QA',
                'message' : 'the process ran',
                'metadata': md}
        print('data: ', data)
        print("Calling HTTP method")

        response = http.run(endpoint,
                            json.dumps(data),
                            headers,
                            extra_options) 
        print(response.text)
Esempio n. 25
0
def sub_dag(child_dag_id, input_file_names, key):
    sub_dag = DAG('{}.{}'.format(dag_id, child_dag_id),
                  default_args=default_args,
                  catchup=False,
                  schedule_interval=vlass_dag.schedule_interval)
    http_conn = HttpHook('GET', http_conn_id)
    auth_conn = HttpHook.get_connection(http_conn_id)

    with http_conn.run('/cred/auth/priv/users/{}'.format(
            auth_conn.login)) as response:
        cert = response.text
        for idx, x in enumerate(input_file_names):
            KubernetesPodOperator(
                dag=sub_dag,
                namespace='default',
                task_id='vlass-transform-{}-{}'.format(idx, key),
                in_cluster=True,
                get_logs=True,
                cmds=['{}_run_single'.format(collection.lower())],
                arguments=[x, cert],
                name='airflow-vlass-transform-pod',
                volumes=[volume],
                volume_mounts=[volume_mount])

    return sub_dag
Esempio n. 26
0
    def _get_weather_data(self, lat, lon):
        """
        Gets the weather data from the specified coordinates and time

        :param lat: latitude to be used as query param.
        :param lon: longitude to be used as query param.
        :return: response retrieved from the API
        """

        open_weather = HttpHook(method="GET",
                                http_conn_id=self.open_weather_conn)
        data = {
            "lat": lat,
            "lon": lon,
            "dt": calendar.timegm(self.date.timetuple()),
            "appid": self.app_id,
            "units": "metric",
        }

        response = open_weather.run("/data/2.5/onecall/timemachine", data=data)

        if response.status_code == 200:
            self.log.info("Weather data successfully retrived from location")
            return self._weather_date_to_datetime(response.json()["hourly"])
        else:
            raise ValueError
Esempio n. 27
0
def insert_rows():

    insert_ts = datetime.utcnow()

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={'codigo': table_variables['codigo']})
    http_hook.check_response(response=res)

    cases_df = pd.DataFrame(res.json()['timeline'])

    for row in cases_df.itertuples(index=False):
        date = row.fecha

        information = pd.Series(row.regiones[0]['data'])

        information = information[['casosConfirmados', 'casosUci', 'casosFallecidos', 'casosHospitalizados', 'casosRecuperados',
                                   'casosConfirmadosDiario', 'casosUciDiario', 'casosFallecidosDiario',
                                   'casosHospitalizadosDiario', 'casosRecuperadosDiario']]
        pg_hook.run(sql_insert, parameters=(date, information[0], information[1],
                                            information[2], information[3], information[4],
                                            information[5], information[6], information[7],
                                            information[8], information[9], insert_ts))
Esempio n. 28
0
 def test_host_encoded_https_connection(self, mock_get_connection):
     c = Connection(conn_id='http_default', conn_type='http',
                    host='https://localhost')
     mock_get_connection.return_value = c
     hook = HttpHook()
     hook.get_conn({})
     self.assertEqual(hook.base_url, 'https://localhost')
Esempio n. 29
0
    def __init__(self,
                 http_conn_id,
                 token,
                 job_name,
                 data=None,
                 headers=None,
                 method='start',
                 daemon=True,
                 parallelism=0,
                 retry_times=3,
                 retry_sleep_time=1,
                 *args,
                 **kwargs):
        basic_headers = {'Content-Type': "application/json", 'Token': token}
        if headers:
            basic_headers.update(headers)
        self.headers = basic_headers
        self.http_conn_id = http_conn_id
        self.job_name = job_name
        self.http = HttpHook('POST', http_conn_id=self.http_conn_id)

        self.data = data if data is not None else {}
        self.job_last_run_id = dict()
        self.job_pools = []
        self.all_jobs = None
        self.finished_jobs = []
        self.parallelism = parallelism
        self.method = method
        self.daemon = daemon
        self.retry_times = retry_times
        self.retry_sleep_time = retry_sleep_time
        self.start_run_time = time.time()
        self.failed_jobs = defaultdict(int)

        super(_BaseJobOperator, self).__init__(*args, **kwargs)
Esempio n. 30
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    bcn_covid_measures = res.json()['result']['records']

    bcn_covid_df = pd.DataFrame(bcn_covid_measures)
    bcn_covid_df = bcn_covid_df[[
        '_id', 'Data_Indicador', 'Font', 'Frequencia_Indicador',
        'Nom_Indicador', 'Nom_Variable', 'Territori', 'Unitat', 'Valor'
    ]]
    bcn_covid_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                         inplace=True)
    insert_ts = datetime.utcnow()

    for row in bcn_covid_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))