def _retrieve_file(fname, job_id, run_id, local_dir, client=None): """Download a Civis file using a reference on a previous run""" file_id = cio.file_id_from_run_output(fname, job_id, run_id, client=client) fpath = os.path.join(local_dir, fname) # fname may contain a path output_dir = os.path.dirname(fpath) if not os.path.exists(output_dir): os.makedirs(output_dir) with open(fpath, 'wb') as down_file: cio.civis_to_file(file_id, down_file, client=client) return fpath
def civis_to_multifile_csv(sql, database, job_name=None, api_key=None, client=None, credential_id=None, include_header=True, compression='none', delimiter='|', unquoted=False, prefix=None, polling_interval=None, hidden=True): """Unload the result of SQL query and return presigned urls. This function is intended for unloading large queries/tables from redshift as it uses a 'PARALLEL ON' S3 unload. It returns a similar manifest file to conventional S3 UNLOAD statements except the CSV parts are accessible via both files endpoint IDs and presigned S3 urls. Parameters ---------- sql : str, optional The SQL select string to be executed. database : str or int Execute the query against this database. Can be the database name or ID. job_name : str, optional A name to give the job. If omitted, a random job name will be used. api_key : DEPRECATED str, optional Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY` environment variable will be used. client : :class:`civis.APIClient`, optional If not provided, an :class:`civis.APIClient` object will be created from the :envvar:`CIVIS_API_KEY`. credential_id : str or int, optional The database credential ID. If ``None``, the default credential will be used. include_header: bool, optional If ``True`` include a key in the returned dictionary containing a list of column names. Default: ``True``. compression: str, optional Type of compression to use, if any. One of ``'none'``, ``'zip'``, or ``'gzip'``. Default ``'none'``. delimiter, str: optional Which delimiter to use, if any. One of ``','``, ``'\t'``, or ``'|'``. Default: ``'|'``. unquoted: bool, optional Whether or not to quote fields. Default: ``False``. prefix: str, optional A user specified filename prefix for the output file to have. Default: ``None``. polling_interval : int or float, optional Number of seconds to wait between checks for query completion. hidden : bool, optional If ``True`` (the default), this job will not appear in the Civis UI. Returns ------- unload_manifest: dict A dictionary resembling an AWS manifest file. Has the following keys: ``'header'``, ``'query'``, ``'entries'``, respresenting the columns from the query, the query itself, and a list of dictionaries for each unloaded CSV part, each containing its file ``'id'``, ``'name'``, ``'size'``, and unsigned and signed S3 urls, ``'url'`` and ``'url_signed'``, respectively. Examples -------- >>> sql = "SELECT * FROM schema.my_big_table" >>> database = "my_database" >>> delimiter = "|" >>> manifest = civis_multipart_unload(sql, database, delimiter=delimiter) >>> ids = [file['id'] for file in manifest['files']] >>> buf = BytesIO() >>> civis_to_file(ids[0], buf) >>> buf.seek(0) >>> df = pd.read_csv(buf, delimiter=delimiter) See Also -------- civis.APIClient.scripts.post_sql """ if client is None: client = APIClient(api_key=api_key, resources='all') delimiter = DELIMITERS.get(delimiter) assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys()) csv_settings = dict(include_header=include_header, compression=compression, column_delimiter=delimiter, unquoted=unquoted, filename_prefix=prefix, force_multifile=True) script_id, run_id = _sql_script(client, sql, database, job_name, credential_id, hidden, csv_settings=csv_settings) fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id), polling_interval=polling_interval, client=client, poll_on_creation=False) outputs = fut.result()["output"] if not outputs: raise EmptyResultError("Unload query {} returned no manifest." .format(script_id)) buf = io.BytesIO() civis_to_file(outputs[0]['file_id'], buf) txt = io.TextIOWrapper(buf, encoding='utf-8') txt.seek(0) unload_manifest = json.load(txt) return unload_manifest
def files_download_cmd(file_id, path): """Download a Civis File to a specified local path.""" with open(path, 'wb') as f: civis_to_file(file_id, f)