def test_schema(): client = mock.MagicMock() job = models.Job( client, "job_id", "type", "query", status="status", url="url", debug="debug", start_at="start_at", end_at="end_at", cpu_time="cpu_time", result_size="result_size", result="result", result_url="result_url", hive_result_schema=[["_c1", "string"], ["_c2", "bigint"]], priority="UNKNOWN", retry_limit="retry_limit", org_name="org_name", database="database", ) assert job.id == "job_id" assert job.job_id == "job_id" assert job.type == "type" assert job.result_url == "result_url" assert job.priority == "UNKNOWN" assert job.retry_limit == "retry_limit" assert job.org_name == "org_name" assert job.database == "database" assert job.result_schema == [["_c1", "string"], ["_c2", "bigint"]]
def partial_delete(self, db_name, table_name, to, _from, params=None): """Create a job to partially delete the contents of the table with the given time range. Args: db_name (str): Target database name. table_name (str): Target table name. to (int): Time in Unix Epoch format indicating the End date and time of the data to be deleted. Should be set only by the hour. Minutes and seconds values will not be accepted. _from (int): Time in Unix Epoch format indicating the Start date and time of the data to be deleted. Should be set only by the hour. Minutes and seconds values will not be accepted. params (dict, optional): Extra parameters. - pool_name (str, optional): Indicates the resource pool to execute this job. If not provided, the account's default resource pool would be used. - domain_key (str, optional): Domain key that will be assigned to the partial delete job to be created Returns: :class:`tdclient.models.Job` """ params = {} if params is None else params job_id = self.api.partial_delete(db_name, table_name, to, _from, params) return models.Job(self, job_id, "partialdelete", None)
def job(self, job_id): """Get a job from `job_id` Params: job_id (str): job id Returns: :class:`tdclient.models.Job` """ d = self.api.show_job(str(job_id)) return models.Job( self, job_id, d["type"], d["query"], status=d.get("status"), url=d.get("url"), debug=d.get("debug"), start_at=d.get("start_at"), end_at=d.get("end_at"), created_at=d.get("created_at"), updated_at=d.get("updated_at"), cpu_time=d.get("cpu_time"), result_size=d.get("result_size"), result=d.get("result"), result_url=d.get("result_url"), hive_result_schema=d.get("hive_result_schema"), priority=d.get("priority"), retry_limit=d.get("retry_limit"), org_name=d.get("org_name"), database=d.get("database"), num_records=d.get("num_records"), user_name=d.get("user_name"), linked_result_export_job_id=d.get("linked_result_export_job_id"), result_export_target_job_id=d.get("result_export_target_job_id"), )
def job(d): return models.Job( self, d["job_id"], d["type"], d["query"], status=d.get("status"), url=d.get("url"), debug=d.get("debug"), start_at=d.get("start_at"), end_at=d.get("end_at"), created_at=d.get("created_at"), updated_at=d.get("updated_at"), cpu_time=d.get("cpu_time"), result_size=d.get("result_size"), result=d.get("result"), result_url=d.get("result_url"), hive_result_schema=d.get("hive_result_schema"), priority=d.get("priority"), retry_limit=d.get("retry_limit"), org_name=d.get("org_name"), database=d.get("database"), num_records=d.get("num_records"), user_name=d.get("user_name"), linked_result_export_job_id=d.get( "linked_result_export_job_id"), result_export_target_job_id=d.get( "result_export_target_job_id"), )
def test_job_kill(): client = mock.MagicMock() job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq") job.update = mock.MagicMock() job.kill() client.kill.assert_called_with("12345") assert job.update.called
def test_job_update_status(): client = mock.MagicMock() client.api.show_job = mock.MagicMock(return_value={ "job_id": "67890", "type": "hive", "url": "http://console.example.com/jobs/67890", "query": "SELECT COUNT(1) FROM nasdaq", "status": "success", "debug": None, "start_at": datetime.datetime(2015, 2, 10, 0, 2, 14, tzinfo=dateutil.tz.tzutc()), "end_at": datetime.datetime(2015, 2, 10, 0, 2, 27, tzinfo=dateutil.tz.tzutc()), "created_at": datetime.datetime(2015, 2, 10, 0, 2, 13, tzinfo=dateutil.tz.tzutc()), "updated_at": datetime.datetime(2015, 2, 10, 0, 2, 15, tzinfo=dateutil.tz.tzutc()), "cpu_time": None, "result_size": 22, "result": None, "result_url": None, "hive_result_schema": [["cnt", "bigint"]], "priority": 1, "retry_limit": 0, "org_name": None, "database": "sample_datasets", "num_records": 1, "user_name": "Treasure Data", "linked_result_export_job_id": None, "result_export_target_job_id": None, }) job = models.Job(client, "67890", "hive", "SELECT COUNT(1) FROM nasdaq") job.finished = mock.MagicMock(return_value=False) assert job.status() == "success" client.api.show_job.assert_called_with("67890")
def query(self, db_name, q, result_url=None, priority=None, retry_limit=None, type="hive", **kwargs): """Run a query on specified database table. Params: db_name (str): name of a database q (str): a query string result_url (str): result output URL priority (int or str): priority (e.g. "NORMAL", "HIGH", etc.) retry_limit (int): retry limit type (str): name of a query engine Returns: :class:`tdclient.models.Job` Raises: ValueError: if unknown query type has been specified """ # for compatibility, assume type is hive unless specifically specified if type not in ["hive", "pig", "impala", "presto"]: raise ValueError("The specified query type is not supported: %s" % (type)) job_id = self.api.query(q, type=type, db=db_name, result_url=result_url, priority=priority, retry_limit=retry_limit, **kwargs) return models.Job(self, job_id, type, q)
def job(client, status): stub = models.Job(client, "1", "hive", "SELECT COUNT(1) FROM nasdaq", status=status) stub._update_progress = mock.MagicMock() return stub
def run(client, job_id, status): job = models.Job(client, job_id, "hive", "SELECT COUNT(1) FROM nasdaq", status=status) client.job_status.reset_mock() job._update_progress()
def test_job_result_format_failure(): client = mock.MagicMock() job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq") job.success = mock.MagicMock(return_value=False) job.update = mock.MagicMock() with pytest.raises(ValueError) as error: for row in job.result_format("msgpack.gz"): pass assert not job.update.called
def partial_delete(self, db_name, table_name, to, _from, params=None): """ TODO: add docstring => :class:`tdclient.models.Job` """ params = {} if params is None else params job_id = self.api.partial_delete(db_name, table_name, to, _from, params) return models.Job(self, job_id, "partialdelete", None)
def perform_bulk_import(self, name): """Perform a bulk import session Params: name (str): name of a bulk import session Returns: :class:`tdclient.models.Job` """ job_id = self.api.perform_bulk_import(name) return models.Job(self, job_id, "bulk_import", None)
def test_job_priority(): client = mock.MagicMock() assert (models.Job(client, "1", "hive", "SELECT COUNT(1) FROM nasdaq", priority=-2).priority == "VERY LOW") assert (models.Job(client, "2", "hive", "SELECT COUNT(1) FROM nasdaq", priority=-1).priority == "LOW") assert (models.Job(client, "3", "hive", "SELECT COUNT(1) FROM nasdaq", priority=0).priority == "NORMAL") assert (models.Job(client, "4", "hive", "SELECT COUNT(1) FROM nasdaq", priority=1).priority == "HIGH") assert (models.Job(client, "5", "hive", "SELECT COUNT(1) FROM nasdaq", priority=2).priority == "VERY HIGH") assert (models.Job(client, "42", "hive", "SELECT COUNT(1) FROM nasdaq", priority=42).priority == "42")
def test_job_result_format_list(): client = mock.MagicMock() result = [["foo", 123], ["bar", 456], ["baz", 789]] job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq", result=result) job.success = mock.MagicMock(return_value=True) job.update = mock.MagicMock() rows = [] for row in job.result_format("msgpack.gz"): rows.append(row) assert rows == [["foo", 123], ["bar", 456], ["baz", 789]] assert job.update.called
def export_data(self, db_name, table_name, storage_type, params=None): """Export data from Treasure Data Service Params: db_name (str): name of a database table_name (str): name of a table storage_type (str): type of the storage params (dict): optional parameters Returns: :class:`tdclient.models.Job` """ params = {} if params is None else params job_id = self.api.export_data(db_name, table_name, storage_type, params) return models.Job(self, job_id, "export", None)
def test_job_wait_success(): client = mock.MagicMock() job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq") job.finished = mock.MagicMock(side_effect=[False, True]) job.update = mock.MagicMock() with mock.patch("time.time") as t_time: t_time.side_effect = [ 1423570800.0, 1423570860.0, 1423570920.0, 1423570980.0 ] with mock.patch("time.sleep") as t_sleep: job.wait(timeout=120) assert t_sleep.called assert t_time.called assert job.finished.called assert job.update.called
def test_job_result_generator(): client = mock.MagicMock() def job_result_each(job_id): assert job_id == "12345" yield ["foo", 123] yield ["bar", 456] yield ["baz", 789] client.job_result_each = job_result_each job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq") job.success = mock.MagicMock(return_value=True) job.update = mock.MagicMock() rows = [] for row in job.result(): rows.append(row) assert rows == [["foo", 123], ["bar", 456], ["baz", 789]] assert job.update.called
def test_job_wait_failure(): client = mock.MagicMock() job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq") job.finished = mock.MagicMock(return_value=False) job.update = mock.MagicMock() with mock.patch("time.time") as t_time: t_time.side_effect = [ 1423570800.0, 1423570860.0, 1423570920.0, 1423570980.0 ] with mock.patch("time.sleep") as t_sleep: with pytest.raises(RuntimeError) as error: job.wait(timeout=120) assert t_sleep.called assert t_time.called assert job.finished.called assert not job.update.called
def export_data(self, db_name, table_name, storage_type, params=None): """Export data from Treasure Data Service Args: db_name (str): name of a database table_name (str): name of a table storage_type (str): type of the storage params (dict): optional parameters. Assuming the following keys: - access_key_id (str): ID to access the information to be exported. - secret_access_key (str): Password for the `access_key_id`. - file_prefix (str, optional): Filename of exported file. Default: "<database_name>/<table_name>" - file_format (str, optional): File format of the information to be exported. {"jsonl.gz", "tsv.gz", "json.gz"} - from (int, optional): From Time of the data to be exported in Unix epoch format. - to (int, optional): End Time of the data to be exported in Unix epoch format. - assume_role (str, optional): Assume role. - bucket (str): Name of bucket to be used. - domain_key (str, optional): Job domain key. - pool_name (str, optional): For Presto only. Pool name to be used, if not specified, default pool would be used. Returns: :class:`tdclient.models.Job` """ params = {} if params is None else params job_id = self.api.export_data(db_name, table_name, storage_type, params) return models.Job(self, job_id, "export", None)