def test_repeated_polling(): # Verify that we poll the expected number of times. poll_interval = 0.2 poller = mock.Mock(return_value=Response({"state": "running"})) pollable = PollableResult(poller, (), polling_interval=poll_interval) pollable.done() # Check status once to start the polling thread assert poller.call_count == 1, "Poll once on the first status check" time.sleep(2.2 * poll_interval) assert poller.call_count == 3, "After waiting 2.2x the polling interval"
def test_poll_on_creation(self): poller = mock.Mock(return_value=Response({"state": "running"})) pollable = PollableResult(poller, (), polling_interval=0.01, poll_on_creation=False) pollable.done() # Check status once to start the polling thread assert poller.call_count == 0 time.sleep(0.015) assert poller.call_count == 1
def test_reset_polling_thread(self): pollable = PollableResult( mock.Mock(return_value=Response({"state": "running"})), poller_args=(), polling_interval=0.1) initial_polling_thread = pollable._polling_thread assert pollable.polling_interval == 0.1 assert pollable._polling_thread.polling_interval == 0.1 pollable._reset_polling_thread(0.2) # Check that the polling interval was updated assert pollable.polling_interval == 0.2 assert pollable._polling_thread.polling_interval == 0.2 # Check that the _polling_thread is a new thread assert pollable._polling_thread != initial_polling_thread # Check that the old thread was stopped assert not initial_polling_thread.is_alive()
def test_poll_on_creation(self): poller = mock.Mock(side_effect=Response({"state": "running"})) pollable = PollableResult(poller, (), polling_interval=0.01, poll_on_creation=False) repr(pollable) assert poller.call_count == 0 time.sleep(0.02) assert poller.call_count == 1
def test_timeout(self): # Note: Something about the test framework seems to prevent the # Pollable result from being destroyed while the polling # thread is running. The test will hang if the PollableResult # never completes. I haven't seen the same problem in # the interpreter. pollable = PollableResult( mock.Mock(side_effect=[Response({"state": "running"}), ValueError()]), (), polling_interval=0.1) pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05)
def query_civis(sql, database, api_key=None, credential_id=None, preview_rows=10, polling_interval=_DEFAULT_POLLING_INTERVAL): """Execute a SQL statement as a Civis query. Run a query that may return no results or where only a small preview is required. To execute a query that returns a large number of rows, see :func:`~civis.io.read_civis_sql`. Parameters ---------- sql : str The SQL statement to execute. database : str or int The name or ID of the database. api_key : str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. credential_id : str or int, optional The ID of the database credential. If ``None``, the default credential will be used. preview_rows : int, optional The maximum number of rows to return. No more than 100 rows can be returned at once. polling_interval : int or float, optional Number of seconds to wait between checks for query completion. Returns ------- results : :class:`~civis.polling.PollableResult` A `PollableResult` object. Examples -------- >>> run = query_civis(sql="DELETE schema.table", database='database') >>> run.result() # Wait for query to complete """ client = APIClient(api_key=api_key) database_id = client.get_database_id(database) cred_id = credential_id or client.default_credential resp = client.queries.post(database_id, sql, preview_rows, credential=cred_id) return PollableResult(client.queries.get, (resp.id, ), polling_interval)
def test_timeout(self): pollable = PollableResult( mock.Mock(return_value=Response({"state": "running"})), poller_args=(), polling_interval=0.1) pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05)
def test_error_setting(self): pollable = PollableResult(mock.Mock(side_effect=[ZeroDivisionError()]), (), polling_interval=0.1) assert isinstance(pollable.exception(), ZeroDivisionError)
def test_error_passthrough(self): pollable = PollableResult(mock.Mock(side_effect=[ZeroDivisionError()]), (), polling_interval=0.1) pytest.raises(ZeroDivisionError, pollable.result)
def create_pollable_result(state, exception=None, result=None): f = PollableResult(State, (state, ), polling_interval=0.001) f._exception = exception f._result = result return f
def transfer_table(source_db, dest_db, source_table, dest_table, job_name=None, api_key=None, source_credential_id=None, dest_credential_id=None, polling_interval=_DEFAULT_POLLING_INTERVAL, **advanced_options): """Transfer a table from one location to another. Parameters ---------- source_db : str or int The name of the database where the source table is located. Optionally, could be the database ID. dest_db : str or int The name of the database where the table will be transfered. Optionally, could be the database ID. source_table : str Full name of the table to transfer, e.g., ``'schema.table'``. dest_table : str Full name of the table in the destination database, e.g., ``'schema.table'``. job_name : str, optional A name to give the job. If omitted, a random job name will be used. api_key : str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. source_credential_id : str or int, optional Optional credential ID for the source database. If ``None``, the default credential will be used. dest_credential_id : str or int, optional Optional credential ID for the destination database. If ``None``, the default credential will be used. polling_interval : int or float, optional Number of seconds to wait between checks for job completion. **advanced_options : kwargs Extra keyword arguments will be passed to the import sync job. See :func:`~civis.resources._resources.Imports.post_syncs`. Returns ------- results : :class:`~civis.polling.PollableResult` A `PollableResult` object. Examples -------- >>> transfer_table(source_db='Cluster A', dest_db='Cluster B', ... source_table='schma.tbl', dest_table='schma.tbl') """ client = APIClient(api_key=api_key) source_cred_id = source_credential_id or client.default_credential dest_cred_id = dest_credential_id or client.default_credential job_name = maybe_get_random_name(job_name) source = { 'remote_host_id': client.get_database_id(source_db), 'credential_id': source_cred_id } destination = { 'remote_host_id': client.get_database_id(dest_db), 'credential_id': dest_cred_id } job_id = client.imports.post(job_name, "Dbsync", True, source=source, destination=destination).id client.imports.post_syncs(id=job_id, source={'path': source_table}, destination={'path': dest_table}, advanced_options=advanced_options) run_id = client.imports.post_runs(id=job_id).run_id poll = PollableResult(client.imports.get_files_runs, (job_id, run_id), polling_interval) return poll
def csv_to_civis(filename, database, table, api_key=None, max_errors=None, existing_table_rows="fail", distkey=None, sortkey1=None, sortkey2=None, delimiter=",", headers=None, credential_id=None, polling_interval=_DEFAULT_POLLING_INTERVAL, archive=True): """Upload the contents of a local CSV file to Civis. Parameters ---------- filename : str Upload the contents of this file. database : str or int Upload data into this database. Can be the database name or ID. table : str The schema and table you want to upload to. E.g., ``'scratch.table'``. api_key : str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. max_errors : int, optional The maximum number of rows with errors to remove from the import before failing. existing_table_rows : str, optional The behaviour if a table with the requested name already exists. One of ``'fail'``, ``'truncate'`` or ``'append'``. Defaults to ``'fail'``. distkey : str, optional The column to use as the distkey for the table. sortkey1 : str, optional The column to use as the sortkey for the table. sortkey2 : str, optional The second column in a compound sortkey for the table. delimiter : string, optional The column delimiter. One of ``','``, ``'\\t'`` or ``'|'``. headers : bool, optional Whether or not the first row of the file should be treated as headers. The default, ``None``, attempts to autodetect whether or not the first row contains headers. credential_id : str or int, optional The ID of the database credential. If ``None``, the default credential will be used. polling_interval : int or float, optional Number of seconds to wait between checks for job completion. archive : bool, optional If ``True`` (the default), archive the import job as soon as it completes. Returns ------- results : :class:`~civis.polling.PollableResult` A `PollableResult` object. Notes ----- This reads the contents of `filename` into memory. Examples -------- >>> with open('input_file.csv', 'w') as _input: ... _input.write('a,b,c\\n1,2,3') >>> poller = civis.io.csv_to_civis('input_file.csv', ... 'my-database', ... 'scratch.my_data') >>> poller.result() """ client = APIClient(api_key=api_key) schema, table = table.split(".", 1) db_id = client.get_database_id(database) cred_id = credential_id or client.default_credential delimiter = DELIMITERS.get(delimiter) assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys()) kwargs = dict(schema=schema, name=table, remote_host_id=db_id, credential_id=cred_id, max_errors=max_errors, existing_table_rows=existing_table_rows, distkey=distkey, sortkey1=sortkey1, sortkey2=sortkey2, column_delimiter=delimiter, first_row_is_header=headers) import_job = client.imports.post_files(**kwargs) with open(filename, "rb") as data: put_response = requests.put(import_job.upload_uri, data) put_response.raise_for_status() run_job_result = client._session.post(import_job.run_uri) run_job_result.raise_for_status() run_info = run_job_result.json() poll = PollableResult(client.imports.get_files_runs, (run_info['importId'], run_info['id']), polling_interval=polling_interval) if archive: def f(x): return client.imports.put_archive(import_job.id, True) poll.add_done_callback(f) return poll
def civis_to_csv(filename, sql, database, job_name=None, api_key=None, credential_id=None, polling_interval=_DEFAULT_POLLING_INTERVAL, archive=True): """Export data from Civis to a local CSV file. Parameters ---------- filename : str Download exported data into this file. sql : str, optional The SQL select string to be executed. database : str or int Export data from this database. Can be the database name or ID. job_name : str, optional A name to give the job. If omitted, a random job name will be used. api_key : str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. credential_id : str or int, optional The ID of the database credential. If ``None``, the default credential will be used. polling_interval : int or float, optional Number of seconds to wait between checks for query completion. archive : bool, optional If ``True`` (the default), archive the export job as soon as it completes. Returns ------- results : :class:`~civis.polling.PollableResult` A `PollableResult` object. Examples -------- >>> sql = "SELECT * FROM schema.table" >>> poll = civis_to_csv("file.csv", sql, "my_database") >>> poll.result() # Wait for job to complete See Also -------- civis.io.read_civis : Read table contents into memory. civis.io.read_civis_sql : Read results of a SQL query into memory. """ client = APIClient(api_key=api_key) script_id, run_id = _sql_script(client, sql, database, job_name, credential_id) poll = PollableResult(client.scripts.get_sql_runs, (script_id, run_id), polling_interval) download = _download_callback(script_id, run_id, client, filename) poll.add_done_callback(download) if archive: def f(x): return client.scripts.put_sql_archive(script_id, True) poll.add_done_callback(f) return poll
def read_civis_sql(sql, database, use_pandas=False, job_name=None, api_key=None, credential_id=None, polling_interval=_DEFAULT_POLLING_INTERVAL, archive=True, **kwargs): """Read data from Civis using a custom SQL string. Parameters ---------- sql : str, optional The SQL select string to be executed. database : str or int Execute the query against this database. Can be the database name or ID. use_pandas : bool, optional If ``True``, return a :class:`pandas:pandas.DataFrame`. Otherwise, return a list of results from :func:`python:csv.reader`. job_name : str, optional A name to give the job. If omitted, a random job name will be used. api_key : str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. credential_id : str or int, optional The database credential ID. If ``None``, the default credential will be used. polling_interval : int or float, optional Number of seconds to wait between checks for query completion. archive : bool, optional If ``True`` (the default), archive the export job as soon as it completes. **kwargs : kwargs Extra keyword arguments are passed into :func:`pandas:pandas.read_csv` if `use_pandas` is ``True`` or passed into :func:`python:csv.reader` if `use_pandas` is ``False``. Returns ------- data : :class:`pandas:pandas.DataFrame` or list A list of rows (with header as first row) if `use_pandas` is ``False``, otherwise a `pandas` `DataFrame`. Note that if `use_pandas` is ``False``, no parsing of types is performed and each row will be a list of strings. Raises ------ ImportError If `use_pandas` is ``True`` and `pandas` is not installed. Examples -------- >>> sql = "SELECT * FROM schema.table" >>> df = read_civis_sql(sql, "my_database", use_pandas=True) >>> col_a = df["column_a"] >>> data = read_civis_sql(sql, "my_database") >>> columns = data.pop(0) >>> col_a_index = columns.index("column_a") >>> col_a = [row[col_a_index] for row in data] Notes ----- This reads the data into memory. See Also -------- civis.io.read_civis : Read directly into memory without SQL. civis.io.civis_to_csv : Write directly to a CSV file. """ if use_pandas and NO_PANDAS: raise ImportError("use_pandas is True but pandas is not installed.") client = APIClient(api_key=api_key) script_id, run_id = _sql_script(client, sql, database, job_name, credential_id) poll = PollableResult(client.scripts.get_sql_runs, (script_id, run_id), polling_interval) if archive: def f(x): return client.scripts.put_sql_archive(script_id, True) poll.add_done_callback(f) poll.result() outputs = client.scripts.get_sql_runs(script_id, run_id)["output"] if not outputs: raise EmptyResultError( "Query {} returned no output.".format(script_id)) url = outputs[0]["path"] if use_pandas: data = pd.read_csv(url, **kwargs) else: r = requests.get(url) r.raise_for_status() data = list(csv.reader(StringIO(r.text), **kwargs)) return data