Esempio n. 1
0
def test_repeated_polling():
    # Verify that we poll the expected number of times.
    poll_interval = 0.2
    poller = mock.Mock(return_value=Response({"state": "running"}))
    pollable = PollableResult(poller, (), polling_interval=poll_interval)
    pollable.done()  # Check status once to start the polling thread
    assert poller.call_count == 1, "Poll once on the first status check"
    time.sleep(2.2 * poll_interval)
    assert poller.call_count == 3, "After waiting 2.2x the polling interval"
Esempio n. 2
0
 def test_poll_on_creation(self):
     poller = mock.Mock(return_value=Response({"state": "running"}))
     pollable = PollableResult(poller, (),
                               polling_interval=0.01,
                               poll_on_creation=False)
     pollable.done()  # Check status once to start the polling thread
     assert poller.call_count == 0
     time.sleep(0.015)
     assert poller.call_count == 1
Esempio n. 3
0
 def test_reset_polling_thread(self):
     pollable = PollableResult(
         mock.Mock(return_value=Response({"state": "running"})),
         poller_args=(),
         polling_interval=0.1)
     initial_polling_thread = pollable._polling_thread
     assert pollable.polling_interval == 0.1
     assert pollable._polling_thread.polling_interval == 0.1
     pollable._reset_polling_thread(0.2)
     # Check that the polling interval was updated
     assert pollable.polling_interval == 0.2
     assert pollable._polling_thread.polling_interval == 0.2
     # Check that the _polling_thread is a new thread
     assert pollable._polling_thread != initial_polling_thread
     # Check that the old thread was stopped
     assert not initial_polling_thread.is_alive()
Esempio n. 4
0
 def test_poll_on_creation(self):
     poller = mock.Mock(side_effect=Response({"state": "running"}))
     pollable = PollableResult(poller, (),
                               polling_interval=0.01,
                               poll_on_creation=False)
     repr(pollable)
     assert poller.call_count == 0
     time.sleep(0.02)
     assert poller.call_count == 1
Esempio n. 5
0
 def test_timeout(self):
     # Note: Something about the test framework seems to prevent the
     # Pollable result from being destroyed while the polling
     # thread is running. The test will hang if the PollableResult
     # never completes. I haven't seen the same problem in
     # the interpreter.
     pollable = PollableResult(
         mock.Mock(side_effect=[Response({"state": "running"}),
                                ValueError()]), (),
         polling_interval=0.1)
     pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05)
Esempio n. 6
0
def query_civis(sql,
                database,
                api_key=None,
                credential_id=None,
                preview_rows=10,
                polling_interval=_DEFAULT_POLLING_INTERVAL):
    """Execute a SQL statement as a Civis query.

    Run a query that may return no results or where only a small
    preview is required. To execute a query that returns a large number
    of rows, see :func:`~civis.io.read_civis_sql`.

    Parameters
    ----------
    sql : str
        The SQL statement to execute.
    database : str or int
        The name or ID of the database.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    credential_id : str or int, optional
        The ID of the database credential. If ``None``, the default
        credential will be used.
    preview_rows : int, optional
        The maximum number of rows to return. No more than 100 rows can be
        returned at once.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.

    Returns
    -------
    results : :class:`~civis.polling.PollableResult`
        A `PollableResult` object.

    Examples
    --------
    >>> run = query_civis(sql="DELETE schema.table", database='database')
    >>> run.result()  # Wait for query to complete
    """
    client = APIClient(api_key=api_key)
    database_id = client.get_database_id(database)
    cred_id = credential_id or client.default_credential
    resp = client.queries.post(database_id,
                               sql,
                               preview_rows,
                               credential=cred_id)
    return PollableResult(client.queries.get, (resp.id, ), polling_interval)
 def test_timeout(self):
     pollable = PollableResult(
         mock.Mock(return_value=Response({"state": "running"})),
         poller_args=(),
         polling_interval=0.1)
     pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05)
 def test_error_setting(self):
     pollable = PollableResult(mock.Mock(side_effect=[ZeroDivisionError()]),
                               (),
                               polling_interval=0.1)
     assert isinstance(pollable.exception(), ZeroDivisionError)
 def test_error_passthrough(self):
     pollable = PollableResult(mock.Mock(side_effect=[ZeroDivisionError()]),
                               (),
                               polling_interval=0.1)
     pytest.raises(ZeroDivisionError, pollable.result)
def create_pollable_result(state, exception=None, result=None):
    f = PollableResult(State, (state, ), polling_interval=0.001)
    f._exception = exception
    f._result = result
    return f
Esempio n. 11
0
def transfer_table(source_db,
                   dest_db,
                   source_table,
                   dest_table,
                   job_name=None,
                   api_key=None,
                   source_credential_id=None,
                   dest_credential_id=None,
                   polling_interval=_DEFAULT_POLLING_INTERVAL,
                   **advanced_options):
    """Transfer a table from one location to another.

    Parameters
    ----------
    source_db : str or int
        The name of the database where the source table is located.
        Optionally, could be the database ID.
    dest_db : str or int
        The name of the database where the table will be transfered.
        Optionally, could be the database ID.
    source_table : str
        Full name of the table to transfer, e.g., ``'schema.table'``.
    dest_table : str
        Full name of the table in the destination database, e.g.,
        ``'schema.table'``.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    source_credential_id : str or int, optional
        Optional credential ID for the source database. If ``None``, the
        default credential will be used.
    dest_credential_id : str or int, optional
        Optional credential ID for the destination database. If ``None``,
        the default credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for job completion.
    **advanced_options : kwargs
        Extra keyword arguments will be passed to the import sync job. See
        :func:`~civis.resources._resources.Imports.post_syncs`.

    Returns
    -------
    results : :class:`~civis.polling.PollableResult`
        A `PollableResult` object.

    Examples
    --------
    >>> transfer_table(source_db='Cluster A', dest_db='Cluster B',
    ...                source_table='schma.tbl', dest_table='schma.tbl')
    """
    client = APIClient(api_key=api_key)
    source_cred_id = source_credential_id or client.default_credential
    dest_cred_id = dest_credential_id or client.default_credential
    job_name = maybe_get_random_name(job_name)
    source = {
        'remote_host_id': client.get_database_id(source_db),
        'credential_id': source_cred_id
    }
    destination = {
        'remote_host_id': client.get_database_id(dest_db),
        'credential_id': dest_cred_id
    }
    job_id = client.imports.post(job_name,
                                 "Dbsync",
                                 True,
                                 source=source,
                                 destination=destination).id

    client.imports.post_syncs(id=job_id,
                              source={'path': source_table},
                              destination={'path': dest_table},
                              advanced_options=advanced_options)
    run_id = client.imports.post_runs(id=job_id).run_id

    poll = PollableResult(client.imports.get_files_runs, (job_id, run_id),
                          polling_interval)
    return poll
Esempio n. 12
0
def csv_to_civis(filename,
                 database,
                 table,
                 api_key=None,
                 max_errors=None,
                 existing_table_rows="fail",
                 distkey=None,
                 sortkey1=None,
                 sortkey2=None,
                 delimiter=",",
                 headers=None,
                 credential_id=None,
                 polling_interval=_DEFAULT_POLLING_INTERVAL,
                 archive=True):
    """Upload the contents of a local CSV file to Civis.

    Parameters
    ----------
    filename : str
        Upload the contents of this file.
    database : str or int
        Upload data into this database. Can be the database name or ID.
    table : str
        The schema and table you want to upload to. E.g.,
        ``'scratch.table'``.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    max_errors : int, optional
        The maximum number of rows with errors to remove from the import
        before failing.
    existing_table_rows : str, optional
        The behaviour if a table with the requested name already exists.
        One of ``'fail'``, ``'truncate'`` or ``'append'``. Defaults to
        ``'fail'``.
    distkey : str, optional
        The column to use as the distkey for the table.
    sortkey1 : str, optional
        The column to use as the sortkey for the table.
    sortkey2 : str, optional
        The second column in a compound sortkey for the table.
    delimiter : string, optional
        The column delimiter. One of ``','``, ``'\\t'`` or ``'|'``.
    headers : bool, optional
        Whether or not the first row of the file should be treated as
        headers. The default, ``None``, attempts to autodetect whether
        or not the first row contains headers.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for job completion.
    archive : bool, optional
        If ``True`` (the default), archive the import job as soon as it
        completes.

    Returns
    -------
    results : :class:`~civis.polling.PollableResult`
        A `PollableResult` object.

    Notes
    -----
    This reads the contents of `filename` into memory.

    Examples
    --------
    >>> with open('input_file.csv', 'w') as _input:
    ...     _input.write('a,b,c\\n1,2,3')
    >>> poller = civis.io.csv_to_civis('input_file.csv',
    ...                                'my-database',
    ...                                'scratch.my_data')
    >>> poller.result()
    """
    client = APIClient(api_key=api_key)
    schema, table = table.split(".", 1)
    db_id = client.get_database_id(database)
    cred_id = credential_id or client.default_credential
    delimiter = DELIMITERS.get(delimiter)
    assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys())

    kwargs = dict(schema=schema,
                  name=table,
                  remote_host_id=db_id,
                  credential_id=cred_id,
                  max_errors=max_errors,
                  existing_table_rows=existing_table_rows,
                  distkey=distkey,
                  sortkey1=sortkey1,
                  sortkey2=sortkey2,
                  column_delimiter=delimiter,
                  first_row_is_header=headers)

    import_job = client.imports.post_files(**kwargs)
    with open(filename, "rb") as data:
        put_response = requests.put(import_job.upload_uri, data)
    put_response.raise_for_status()
    run_job_result = client._session.post(import_job.run_uri)
    run_job_result.raise_for_status()
    run_info = run_job_result.json()
    poll = PollableResult(client.imports.get_files_runs,
                          (run_info['importId'], run_info['id']),
                          polling_interval=polling_interval)
    if archive:

        def f(x):
            return client.imports.put_archive(import_job.id, True)

        poll.add_done_callback(f)
    return poll
Esempio n. 13
0
def civis_to_csv(filename,
                 sql,
                 database,
                 job_name=None,
                 api_key=None,
                 credential_id=None,
                 polling_interval=_DEFAULT_POLLING_INTERVAL,
                 archive=True):
    """Export data from Civis to a local CSV file.

    Parameters
    ----------
    filename : str
        Download exported data into this file.
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Export data from this database. Can be the database name or ID.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional
        If ``True`` (the default), archive the export job as soon as it
        completes.

    Returns
    -------
    results : :class:`~civis.polling.PollableResult`
        A `PollableResult` object.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> poll = civis_to_csv("file.csv", sql, "my_database")
    >>> poll.result()  # Wait for job to complete

    See Also
    --------
    civis.io.read_civis : Read table contents into memory.
    civis.io.read_civis_sql : Read results of a SQL query into memory.
    """
    client = APIClient(api_key=api_key)
    script_id, run_id = _sql_script(client, sql, database, job_name,
                                    credential_id)
    poll = PollableResult(client.scripts.get_sql_runs, (script_id, run_id),
                          polling_interval)
    download = _download_callback(script_id, run_id, client, filename)
    poll.add_done_callback(download)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        poll.add_done_callback(f)

    return poll
Esempio n. 14
0
def read_civis_sql(sql,
                   database,
                   use_pandas=False,
                   job_name=None,
                   api_key=None,
                   credential_id=None,
                   polling_interval=_DEFAULT_POLLING_INTERVAL,
                   archive=True,
                   **kwargs):
    """Read data from Civis using a custom SQL string.

    Parameters
    ----------
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Execute the query against this database. Can be the database name
        or ID.
    use_pandas : bool, optional
        If ``True``, return a :class:`pandas:pandas.DataFrame`. Otherwise,
        return a list of results from :func:`python:csv.reader`.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    credential_id : str or int, optional
        The database credential ID.  If ``None``, the default credential
        will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional
        If ``True`` (the default), archive the export job as soon as it
        completes.
    **kwargs : kwargs
        Extra keyword arguments are passed into
        :func:`pandas:pandas.read_csv` if `use_pandas` is ``True`` or
        passed into :func:`python:csv.reader` if `use_pandas` is
        ``False``.

    Returns
    -------
    data : :class:`pandas:pandas.DataFrame` or list
        A list of rows (with header as first row) if `use_pandas` is
        ``False``, otherwise a `pandas` `DataFrame`. Note that if
        `use_pandas` is ``False``, no parsing of types is performed and
        each row will be a list of strings.

    Raises
    ------
    ImportError
        If `use_pandas` is ``True`` and `pandas` is not installed.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> df = read_civis_sql(sql, "my_database", use_pandas=True)
    >>> col_a = df["column_a"]

    >>> data = read_civis_sql(sql, "my_database")
    >>> columns = data.pop(0)
    >>> col_a_index = columns.index("column_a")
    >>> col_a = [row[col_a_index] for row in data]

    Notes
    -----
    This reads the data into memory.

    See Also
    --------
    civis.io.read_civis : Read directly into memory without SQL.
    civis.io.civis_to_csv : Write directly to a CSV file.
    """
    if use_pandas and NO_PANDAS:
        raise ImportError("use_pandas is True but pandas is not installed.")
    client = APIClient(api_key=api_key)
    script_id, run_id = _sql_script(client, sql, database, job_name,
                                    credential_id)
    poll = PollableResult(client.scripts.get_sql_runs, (script_id, run_id),
                          polling_interval)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        poll.add_done_callback(f)
    poll.result()
    outputs = client.scripts.get_sql_runs(script_id, run_id)["output"]
    if not outputs:
        raise EmptyResultError(
            "Query {} returned no output.".format(script_id))
    url = outputs[0]["path"]
    if use_pandas:
        data = pd.read_csv(url, **kwargs)
    else:
        r = requests.get(url)
        r.raise_for_status()
        data = list(csv.reader(StringIO(r.text), **kwargs))
    return data