def test_schema():
    client = mock.MagicMock()
    job = models.Job(
        client,
        "job_id",
        "type",
        "query",
        status="status",
        url="url",
        debug="debug",
        start_at="start_at",
        end_at="end_at",
        cpu_time="cpu_time",
        result_size="result_size",
        result="result",
        result_url="result_url",
        hive_result_schema=[["_c1", "string"], ["_c2", "bigint"]],
        priority="UNKNOWN",
        retry_limit="retry_limit",
        org_name="org_name",
        database="database",
    )
    assert job.id == "job_id"
    assert job.job_id == "job_id"
    assert job.type == "type"
    assert job.result_url == "result_url"
    assert job.priority == "UNKNOWN"
    assert job.retry_limit == "retry_limit"
    assert job.org_name == "org_name"
    assert job.database == "database"
    assert job.result_schema == [["_c1", "string"], ["_c2", "bigint"]]
Beispiel #2
0
    def partial_delete(self, db_name, table_name, to, _from, params=None):
        """Create a job to partially delete the contents of the table with the given
        time range.

        Args:
            db_name (str): Target database name.
            table_name (str): Target table name.
            to (int): Time in Unix Epoch format indicating the End date and time of the
                data to be deleted. Should be set only by the hour. Minutes and seconds
                values will not be accepted.
            _from (int): Time in Unix Epoch format indicating the Start date and time of
                the data to be deleted. Should be set only by the hour. Minutes and
                seconds values will not be accepted.
            params (dict, optional): Extra parameters.

                - pool_name (str, optional):
                    Indicates the resource pool to execute this
                    job. If not provided, the account's default resource pool would be
                    used.
                - domain_key (str, optional):
                    Domain key that will be assigned to the
                    partial delete job to be created

        Returns:
             :class:`tdclient.models.Job`
        """
        params = {} if params is None else params
        job_id = self.api.partial_delete(db_name, table_name, to, _from,
                                         params)
        return models.Job(self, job_id, "partialdelete", None)
Beispiel #3
0
    def job(self, job_id):
        """Get a job from `job_id`

        Params:
            job_id (str): job id

        Returns: :class:`tdclient.models.Job`
        """
        d = self.api.show_job(str(job_id))
        return models.Job(
            self,
            job_id,
            d["type"],
            d["query"],
            status=d.get("status"),
            url=d.get("url"),
            debug=d.get("debug"),
            start_at=d.get("start_at"),
            end_at=d.get("end_at"),
            created_at=d.get("created_at"),
            updated_at=d.get("updated_at"),
            cpu_time=d.get("cpu_time"),
            result_size=d.get("result_size"),
            result=d.get("result"),
            result_url=d.get("result_url"),
            hive_result_schema=d.get("hive_result_schema"),
            priority=d.get("priority"),
            retry_limit=d.get("retry_limit"),
            org_name=d.get("org_name"),
            database=d.get("database"),
            num_records=d.get("num_records"),
            user_name=d.get("user_name"),
            linked_result_export_job_id=d.get("linked_result_export_job_id"),
            result_export_target_job_id=d.get("result_export_target_job_id"),
        )
Beispiel #4
0
 def job(d):
     return models.Job(
         self,
         d["job_id"],
         d["type"],
         d["query"],
         status=d.get("status"),
         url=d.get("url"),
         debug=d.get("debug"),
         start_at=d.get("start_at"),
         end_at=d.get("end_at"),
         created_at=d.get("created_at"),
         updated_at=d.get("updated_at"),
         cpu_time=d.get("cpu_time"),
         result_size=d.get("result_size"),
         result=d.get("result"),
         result_url=d.get("result_url"),
         hive_result_schema=d.get("hive_result_schema"),
         priority=d.get("priority"),
         retry_limit=d.get("retry_limit"),
         org_name=d.get("org_name"),
         database=d.get("database"),
         num_records=d.get("num_records"),
         user_name=d.get("user_name"),
         linked_result_export_job_id=d.get(
             "linked_result_export_job_id"),
         result_export_target_job_id=d.get(
             "result_export_target_job_id"),
     )
def test_job_kill():
    client = mock.MagicMock()
    job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq")
    job.update = mock.MagicMock()
    job.kill()
    client.kill.assert_called_with("12345")
    assert job.update.called
def test_job_update_status():
    client = mock.MagicMock()
    client.api.show_job = mock.MagicMock(return_value={
        "job_id": "67890",
        "type": "hive",
        "url": "http://console.example.com/jobs/67890",
        "query": "SELECT COUNT(1) FROM nasdaq",
        "status": "success",
        "debug": None,
        "start_at": datetime.datetime(2015, 2, 10, 0, 2, 14, tzinfo=dateutil.tz.tzutc()),
        "end_at": datetime.datetime(2015, 2, 10, 0, 2, 27, tzinfo=dateutil.tz.tzutc()),
        "created_at": datetime.datetime(2015, 2, 10, 0, 2, 13, tzinfo=dateutil.tz.tzutc()),
        "updated_at": datetime.datetime(2015, 2, 10, 0, 2, 15, tzinfo=dateutil.tz.tzutc()),
        "cpu_time": None,
        "result_size": 22,
        "result": None,
        "result_url": None,
        "hive_result_schema": [["cnt", "bigint"]],
        "priority": 1,
        "retry_limit": 0,
        "org_name": None,
        "database": "sample_datasets",
        "num_records": 1,
        "user_name": "Treasure Data",
        "linked_result_export_job_id": None,
        "result_export_target_job_id": None,
    })
    job = models.Job(client, "67890", "hive", "SELECT COUNT(1) FROM nasdaq")
    job.finished = mock.MagicMock(return_value=False)
    assert job.status() == "success"
    client.api.show_job.assert_called_with("67890")
Beispiel #7
0
    def query(self,
              db_name,
              q,
              result_url=None,
              priority=None,
              retry_limit=None,
              type="hive",
              **kwargs):
        """Run a query on specified database table.

        Params:
            db_name (str): name of a database
            q (str): a query string
            result_url (str): result output URL
            priority (int or str): priority (e.g. "NORMAL", "HIGH", etc.)
            retry_limit (int): retry limit
            type (str): name of a query engine

        Returns: :class:`tdclient.models.Job`

        Raises:
            ValueError: if unknown query type has been specified
        """
        # for compatibility, assume type is hive unless specifically specified
        if type not in ["hive", "pig", "impala", "presto"]:
            raise ValueError("The specified query type is not supported: %s" %
                             (type))
        job_id = self.api.query(q,
                                type=type,
                                db=db_name,
                                result_url=result_url,
                                priority=priority,
                                retry_limit=retry_limit,
                                **kwargs)
        return models.Job(self, job_id, type, q)
 def job(client, status):
     stub = models.Job(client,
                       "1",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       status=status)
     stub._update_progress = mock.MagicMock()
     return stub
 def run(client, job_id, status):
     job = models.Job(client,
                      job_id,
                      "hive",
                      "SELECT COUNT(1) FROM nasdaq",
                      status=status)
     client.job_status.reset_mock()
     job._update_progress()
def test_job_result_format_failure():
    client = mock.MagicMock()
    job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq")
    job.success = mock.MagicMock(return_value=False)
    job.update = mock.MagicMock()
    with pytest.raises(ValueError) as error:
        for row in job.result_format("msgpack.gz"):
            pass
    assert not job.update.called
Beispiel #11
0
 def partial_delete(self, db_name, table_name, to, _from, params=None):
     """
     TODO: add docstring
     => :class:`tdclient.models.Job`
     """
     params = {} if params is None else params
     job_id = self.api.partial_delete(db_name, table_name, to, _from,
                                      params)
     return models.Job(self, job_id, "partialdelete", None)
Beispiel #12
0
    def perform_bulk_import(self, name):
        """Perform a bulk import session

        Params:
            name (str): name of a bulk import session

        Returns: :class:`tdclient.models.Job`
        """
        job_id = self.api.perform_bulk_import(name)
        return models.Job(self, job_id, "bulk_import", None)
def test_job_priority():
    client = mock.MagicMock()
    assert (models.Job(client,
                       "1",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=-2).priority == "VERY LOW")
    assert (models.Job(client,
                       "2",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=-1).priority == "LOW")
    assert (models.Job(client,
                       "3",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=0).priority == "NORMAL")
    assert (models.Job(client,
                       "4",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=1).priority == "HIGH")
    assert (models.Job(client,
                       "5",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=2).priority == "VERY HIGH")
    assert (models.Job(client,
                       "42",
                       "hive",
                       "SELECT COUNT(1) FROM nasdaq",
                       priority=42).priority == "42")
def test_job_result_format_list():
    client = mock.MagicMock()
    result = [["foo", 123], ["bar", 456], ["baz", 789]]
    job = models.Job(client,
                     "12345",
                     "presto",
                     "SELECT COUNT(1) FROM nasdaq",
                     result=result)
    job.success = mock.MagicMock(return_value=True)
    job.update = mock.MagicMock()
    rows = []
    for row in job.result_format("msgpack.gz"):
        rows.append(row)
    assert rows == [["foo", 123], ["bar", 456], ["baz", 789]]
    assert job.update.called
Beispiel #15
0
    def export_data(self, db_name, table_name, storage_type, params=None):
        """Export data from Treasure Data Service

        Params:
            db_name (str): name of a database
            table_name (str): name of a table
            storage_type (str): type of the storage
            params (dict): optional parameters

        Returns: :class:`tdclient.models.Job`
        """
        params = {} if params is None else params
        job_id = self.api.export_data(db_name, table_name, storage_type,
                                      params)
        return models.Job(self, job_id, "export", None)
def test_job_wait_success():
    client = mock.MagicMock()
    job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq")
    job.finished = mock.MagicMock(side_effect=[False, True])
    job.update = mock.MagicMock()
    with mock.patch("time.time") as t_time:
        t_time.side_effect = [
            1423570800.0, 1423570860.0, 1423570920.0, 1423570980.0
        ]
        with mock.patch("time.sleep") as t_sleep:
            job.wait(timeout=120)
            assert t_sleep.called
        assert t_time.called
    assert job.finished.called
    assert job.update.called
def test_job_result_generator():
    client = mock.MagicMock()
    def job_result_each(job_id):
        assert job_id == "12345"
        yield ["foo", 123]
        yield ["bar", 456]
        yield ["baz", 789]
    client.job_result_each = job_result_each
    job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq")
    job.success = mock.MagicMock(return_value=True)
    job.update = mock.MagicMock()
    rows = []
    for row in job.result():
        rows.append(row)
    assert rows == [["foo", 123], ["bar", 456], ["baz", 789]]
    assert job.update.called
def test_job_wait_failure():
    client = mock.MagicMock()
    job = models.Job(client, "12345", "presto", "SELECT COUNT(1) FROM nasdaq")
    job.finished = mock.MagicMock(return_value=False)
    job.update = mock.MagicMock()
    with mock.patch("time.time") as t_time:
        t_time.side_effect = [
            1423570800.0, 1423570860.0, 1423570920.0, 1423570980.0
        ]
        with mock.patch("time.sleep") as t_sleep:
            with pytest.raises(RuntimeError) as error:
                job.wait(timeout=120)
                assert t_sleep.called
        assert t_time.called
    assert job.finished.called
    assert not job.update.called
Beispiel #19
0
    def export_data(self, db_name, table_name, storage_type, params=None):
        """Export data from Treasure Data Service

        Args:
            db_name (str): name of a database
            table_name (str): name of a table
            storage_type (str): type of the storage
            params (dict): optional parameters. Assuming the following keys:

                - access_key_id (str):
                     ID to access the information to be exported.
                - secret_access_key (str):
                     Password for the `access_key_id`.
                - file_prefix (str, optional):
                     Filename of exported file.
                     Default: "<database_name>/<table_name>"
                - file_format (str, optional):
                     File format of the information to be
                     exported. {"jsonl.gz", "tsv.gz", "json.gz"}
                - from (int, optional):
                     From Time of the data to be exported in Unix epoch format.
                - to (int, optional):
                     End Time of the data to be exported in Unix epoch format.
                - assume_role (str, optional): Assume role.
                - bucket (str):
                     Name of bucket to be used.
                - domain_key (str, optional):
                     Job domain key.
                - pool_name (str, optional):
                     For Presto only. Pool name to be used, if not
                     specified, default pool would be used.

        Returns:
             :class:`tdclient.models.Job`
        """
        params = {} if params is None else params
        job_id = self.api.export_data(db_name, table_name, storage_type,
                                      params)
        return models.Job(self, job_id, "export", None)