def test_token_refresh(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=10, log_level=LOG_LEVEL, access_token="invalid", ) invalid_token = d2.access_token count = d2.count("rigs", deleteddate="null") query = d2.query("rigs", pagesize=10000, deleteddate="null") assert len([x for x in query]) == count assert invalid_token != d2.access_token # Test client with no credentials try: d2 = DirectAccessV2( api_key=None, client_id=None, client_secret=None, log_level=LOG_LEVEL ) except DAAuthException as e: pass return
def load(endpoint, **options): """ A generic load function that will be called by each of the three processes. :param endpoint: the Direct Access API endpoint :param options: the query parameters to provide on the endpoint :return: """ # Create a DirectAccessV2 client within the function, providing it our already existing access token # and thus avoiding unnecessary authentication calls client = DirectAccessV2( api_key=os.getenv('DIRECTACCESS_API_KEY'), client_id=os.getenv('DIRECTACCESS_CLIENT_ID'), client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET'), access_token=ACCESS_TOKEN ) count = None with open(endpoint + '.csv', mode='w') as f: writer = csv.writer(f) for i, row in enumerate(client.query(endpoint, **options), start=1): count = i if count == 1: writer.writerow(row.keys()) writer.writerow(row.values()) if count % options.get('pagesize', 100000) == 0: print('Wrote {} records for {}'.format(count, endpoint)) print('Completed writing {}. Final count: {}'.format(endpoint, count)) return
def test_ddl(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, access_token=DIRECTACCESS_TOKEN, retries=5, backoff_factor=10, log_level=LOG_LEVEL, ) ddl = d2.ddl("rigs", database="pg") with TemporaryFile(mode="w+") as f: f.write(ddl) f.seek(0) for line in f: assert line.split(" ")[0] == "CREATE" break # Neg - test ddl with invalid database parameter try: ddl = d2.ddl("rigs", database="invalid") except DAQueryException: pass return
def test_dataframe(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, access_token=DIRECTACCESS_TOKEN, ) df = d2.to_dataframe("rigs", pagesize=10000, deleteddate="null") # Check index is set to API endpoint "primary key" assert df.index.name == "RigID" # Check datetime64 dtypes assert is_datetime64_ns_dtype(df.CreatedDate) assert is_datetime64_ns_dtype(df.DeletedDate) assert is_datetime64_ns_dtype(df.SpudDate) assert is_datetime64_ns_dtype(df.UpdatedDate) # Check Int64 dtypes assert is_int64_dtype(df.PermitDepth) assert is_int64_dtype(df.FormationDepth) # Check float dtypes assert is_float_dtype(df.RigLatitudeWGS84) assert is_float_dtype(df.RigLongitudeWGS84) return
def test_csv(): """ Write Direct Access query results to CSV :return: """ tempdir = mkdtemp() path = os.path.join(tempdir, 'rigs.csv') d2 = DirectAccessV2(api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=10, log_level=LOG_LEVEL, access_token=DIRECTACCESS_ACCESS_TOKEN) dataset = 'rigs' options = dict(pagesize=10000, deleteddate='null') count = d2.count(dataset, **options) query = d2.query(dataset, **options) d2.to_csv(query, path=path, log_progress=True, delimiter=',', quoting=csv.QUOTE_MINIMAL) with open(path, mode='r') as f: reader = csv.reader(f) row_count = len([x for x in reader]) assert row_count == (count + 1)
def set_token(): if not os.environ.get("DIRECTACCESS_TOKEN"): os.environ["DIRECTACCESS_TOKEN"] = DirectAccessV2( client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"), client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"), api_key=os.environ.get("DIRECTACCESS_API_KEY"), ).access_token return
def test_docs(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, access_token=DIRECTACCESS_TOKEN, retries=5, backoff_factor=10, log_level=LOG_LEVEL, ) docs = d2.docs("well-origins") if docs: assert isinstance(docs, list) return
def test_v2_query(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, access_token=DIRECTACCESS_TOKEN, retries=5, backoff_factor=10, log_level=LOG_LEVEL, ) query = d2.query("rigs", pagesize=10000, deleteddate="null") records = list() for i, row in enumerate(query, start=1): records.append(row) if i % 1000 == 0: break assert records
def test_count(): d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, access_token=DIRECTACCESS_TOKEN, retries=5, backoff_factor=10, log_level=LOG_LEVEL, ) count = d2.count("rigs", deleteddate="null") assert count is not None assert isinstance(count, int) # Neg - test count for invalid dataset try: count = d2.count("invalid") except DADatasetException as e: pass return
def test_multiple_processes(): """ Launch two child processes, one for rigs and one for permits. :return: """ if not DIRECTACCESS_ACCESS_TOKEN: access_token = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=10 ).access_token os.environ['DIRECTACCESS_ACCESS_TOKEN'] = access_token else: access_token = DIRECTACCESS_ACCESS_TOKEN procs = list() a = Process( target=query, kwargs=dict( endpoint='rigs', access_token=access_token ) ) procs.append(a) b = Process( target=query, kwargs=dict( endpoint='permits', access_token=access_token ) ) procs.append(b) [x.start() for x in procs] [x.join() for x in procs] return
def query(endpoint, access_token, **options): """ Query method target for multiprocessing child processes. :param endpoint: a valid Direct Access API dataset endpoint :param access_token: a Direct Access API access token :param options: kwargs of valid query parameters for the dataset endpoint :return: """ client = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=5, access_token=access_token, log_level=LOG_LEVEL, ) resp = client.query(endpoint, **options) next(resp) assert resp return
When provided, an initial authentication request will not be made. We still provide our API Key, Client ID and Client Secret to the class so that the access token can be refreshed if needed. In the sample below, we simultaneously write three CSVs from the producing-entities, well-rollups and permits API endpoints. This results in much faster loading time than when done sequentially. """ import os import csv from multiprocessing import Process from directaccess import DirectAccessV2 # Retrieve our access token ACCESS_TOKEN = DirectAccessV2( api_key=os.getenv('DIRECTACCESS_API_KEY'), client_id=os.getenv('DIRECTACCESS_CLIENT_ID'), client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET') ).access_token def load(endpoint, **options): """ A generic load function that will be called by each of the three processes. :param endpoint: the Direct Access API endpoint :param options: the query parameters to provide on the endpoint :return: """ # Create a DirectAccessV2 client within the function, providing it our already existing access token # and thus avoiding unnecessary authentication calls client = DirectAccessV2(
""" import os import logging from multiprocessing import Process from directaccess import DirectAccessV2 DIRECTACCESS_API_KEY = os.environ.get('DIRECTACCESS_API_KEY') DIRECTACCESS_CLIENT_ID = os.environ.get('DIRECTACCESS_CLIENT_ID') DIRECTACCESS_CLIENT_SECRET = os.environ.get('DIRECTACCESS_CLIENT_SECRET') if not os.environ.get('DIRECTACCESS_ACCESS_TOKEN'): access_token = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, ).access_token os.environ['DIRECTACCESS_ACCESS_TOKEN'] = access_token DIRECTACCESS_ACCESS_TOKEN = os.environ.get('DIRECTACCESS_ACCESS_TOKEN') LOG_LEVEL = logging.DEBUG if os.environ.get('CIRCLE_JOB'): LOG_LEVEL = logging.ERROR def query(endpoint, access_token, **options): """ Query method target for multiprocessing child processes. :param endpoint: a valid Direct Access API dataset endpoint
def test_query(): """ Authenticate and query Direct Access API for docs, ddl, count and query methods :return: """ # Test V1 query d1 = DirectAccessV1( api_key=DIRECTACCESS_API_KEY, log_level=LOG_LEVEL ) query = d1.query('rigs', pagesize=1000) records = list() for i, row in enumerate(query, start=1): records.append(row) if i % 1000 == 0: break assert records d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=10, log_level=LOG_LEVEL ) # Test docs docs = d2.docs('well-origins') if docs: assert isinstance(docs, list) # Test DDL ddl = d2.ddl('rigs', database='pg') with TemporaryFile(mode='w+') as f: f.write(ddl) f.seek(0) for line in f: assert line.split(' ')[0] == 'CREATE' break # Test count count = d2.count('rigs', deleteddate='null') assert count is not None assert isinstance(count, int) # Neg - test count for invalid dataset try: count = d2.count('invalid') except DADatasetException as e: pass # Neg - test ddl with invalid database parameter try: ddl = d2.ddl('rigs', database='invalid') except DAQueryException: pass # Test query query = d2.query('rigs', pagesize=10000, deleteddate='null') records = list() for i, row in enumerate(query, start=1): records.append(row) if i % 1000 == 0: break assert records # Test token refresh d2 = DirectAccessV2( api_key=DIRECTACCESS_API_KEY, client_id=DIRECTACCESS_CLIENT_ID, client_secret=DIRECTACCESS_CLIENT_SECRET, retries=5, backoff_factor=10, log_level=LOG_LEVEL, access_token='invalid' ) invalid_token = d2.access_token query = d2.query('rigs', pagesize=10000, deleteddate='null') assert len([x for x in query]) == count assert invalid_token != d2.access_token # Test client with no credentials try: d2 = DirectAccessV2( api_key=None, client_id=None, client_secret=None, log_level=LOG_LEVEL ) except DAAuthException as e: pass return