Exemple #1
0
    def run(self):
        self.logger = log.get_logger('Task')
        self.task_api = TaskApi()

        try:
            result = self.task_api.query({ 'task_id': self.options.task_id })
        except exceptions.ApiException as e:
            print e.message
            sys.exit(1)

        result = { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in result.items() }
        print json.dumps(result, sort_keys=True, indent=4 * ' ')
Exemple #2
0
    def _save(self, finished=None, steps=None, result=None, data=None):
        finished = finished if finished is not None else self.finished
        with self.storage.transaction() as cursor:
            apsw_helpers.query(cursor, '''
                UPDATE %s
                SET
                    last_contact=datetime(:now, 'unixepoch'),
                    update_count=update_count + 1,
                    steps=:steps,
                    finished=datetime(:finished, 'unixepoch'),
                    result=:result,
                    bytes_downloaded=:bytes_downloaded,
                    download_rate=:download_rate,
                    data=:data
                WHERE
                    id = :task_id
                    AND execution_id = :execution_id
                    AND last_contact > datetime(:now, 'unixepoch', '-%s second')
            ''' % (self._queue.table_name, self._queue.execution_ttl),
                now=unix_timestamp(datetime.utcnow()),
                task_id=self.task_id,
                execution_id=self.execution_id,
                steps=json.dumps(steps if steps is not None else self.steps),
                finished=unix_timestamp(finished) if finished else None,
                result=result if result is not None else self.result,
                bytes_downloaded=self.bytes_downloaded,
                download_rate=self.download_rate,
                data=json.dumps(data if data is not None else self.data))

            affected_row = apsw_helpers.get(cursor, '''
                SELECT * from %s
                WHERE
                    id = :task_id
                    AND execution_id = :execution_id
                    AND last_contact > datetime(:now, 'unixepoch', '-%s second')
            ''' % (self._queue.table_name, self._queue.execution_ttl),
                now=unix_timestamp(datetime.utcnow()),
                task_id=self.task_id,
                execution_id=self.execution_id)

        if not affected_row:
            raise TaskDoesNotExist()
        else:
            if steps is not None:
                self.steps = steps
            if finished is not None:
                self.finished = finished
            if result is not None:
                self.result = result
            if data is not None:
                self.data = data
Exemple #3
0
    def run(self):
        self.logger = log.get_logger('Task')
        self.task_api = TaskApi()

        try:
            result = self.task_api.query({'task_id': self.options.task_id})
        except exceptions.ApiException as e:
            print e.message
            sys.exit(1)

        result = {
            k: str(v) if isinstance(v, SuperEnum.Element) else v
            for k, v in result.items()
        }
        print json.dumps(result, sort_keys=True, indent=4 * ' ')
Exemple #4
0
    def format(self):
        if self.tablefmt == TableFormat.JSON:
            # TODO(cary) Patch clark.super_enum to support JSON serialization
            printable_data = [{
                k: str(v) if isinstance(v, SuperEnum.Element) else v
                for k, v in row.iteritems()
            } for row in self.data]
            return json.dumps(printable_data, sort_keys=True,
                              indent=4 * ' ').encode('utf-8')
        else:
            ptable = PrettyTable(self.columns)
            for k, v in self.align.iteritems():
                ptable.align[k] = v

            for row in self.data:
                ptable.add_row([row[col] for col in self.columns])

            if self.tablefmt == TableFormat.TABLE:
                return ptable.get_string(
                    sortby=self.sort_by,
                    reversesort=self.reverse_sort).encode('utf-8')
            elif self.tablefmt == TableFormat.HTML:
                return ptable.get_html_string(
                    sortby=self.sort_by,
                    reversesort=self.reverse_sort).encode('utf-8')
Exemple #5
0
 def enqueue(self, data, job_id=None, file_id=None, md5=None,
             bytes_total=None):
     """ Enqueue task with specified data. """
     jsonified_data = json.dumps(data)
     with self.storage.transaction() as cursor:
         apsw_helpers.query(cursor, '''
             INSERT INTO %s
                 (created,
                  data,
                  job_id,
                  file_id,
                  md5,
                  bytes_total)
             VALUES
                 (datetime(:now, "unixepoch"),
                  :data,
                  :job_id,
                  :file_id,
                  :md5,
                  :bytes_total)
         ''' % self.table_name,
             now=unix_timestamp(datetime.utcnow()),
             data=jsonified_data,
             job_id=job_id,
             file_id=file_id,
             md5=md5,
             bytes_total=bytes_total)
         # Return the number of rows we inserted.
         return 1
Exemple #6
0
 def enqueue(self,
             data,
             job_id=None,
             file_id=None,
             md5=None,
             bytes_total=None):
     """ Enqueue task with specified data. """
     jsonified_data = json.dumps(data)
     with self.storage.transaction() as cursor:
         apsw_helpers.query(cursor,
                            '''
             INSERT INTO %s
                 (created,
                  data,
                  job_id,
                  file_id,
                  md5,
                  bytes_total)
             VALUES
                 (datetime(:now, "unixepoch"),
                  :data,
                  :job_id,
                  :file_id,
                  :md5,
                  :bytes_total)
         ''' % self.table_name,
                            now=unix_timestamp(datetime.utcnow()),
                            data=jsonified_data,
                            job_id=job_id,
                            file_id=file_id,
                            md5=md5,
                            bytes_total=bytes_total)
         # Return the number of rows we inserted.
         return 1
Exemple #7
0
    def requeue(self):
        if self._running_steps() != 0:
            raise StepRunning()
        if self.finished is not None:
            raise AlreadyFinished()

        data = copy.deepcopy(self.data)
        self.bytes_downloaded = None
        self.download_rate = None
        data.pop('time_left', None)

        with self._queue.storage.transaction() as cursor:
            affected_row = apsw_helpers.get(cursor, '''
                SELECT * from %s
                WHERE
                    id = :task_id
                    AND execution_id = :execution_id
                    AND last_contact > datetime(:now, 'unixepoch', '-%s second')
            ''' % (self._queue.table_name, self._queue.execution_ttl),
                now=unix_timestamp(datetime.utcnow()),
                task_id=self.task_id,
                execution_id=self.execution_id)

            if not affected_row:
                raise TaskDoesNotExist()

            apsw_helpers.query(cursor, '''
                UPDATE %s
                SET
                    last_contact=NULL,
                    update_count=update_count + 1,
                    started=NULL,
                    steps=NULL,
                    execution_id=NULL,
                    finished=NULL,
                    data=:data,
                    result=NULL
                WHERE
                    id = :task_id
                    AND execution_id = :execution_id
                    AND last_contact > datetime(:now, 'unixepoch', '-%s second')
            ''' % (self._queue.table_name, self._queue.execution_ttl),
                data=json.dumps(data),
                now=unix_timestamp(datetime.utcnow()),
                task_id=self.task_id,
                execution_id=self.execution_id)
    def format(self):
        if self.tablefmt == TableFormat.JSON:
            # TODO(cary) Patch clark.super_enum to support JSON serialization
            printable_data = [
                { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in row.iteritems() }
                for row in self.data
            ]
            return json.dumps(printable_data, sort_keys=True, indent=4 * ' ').encode('utf-8')
        else:
            ptable = PrettyTable(self.columns)
            for k, v in self.align.iteritems():
                ptable.align[k] = v

            for row in self.data:
                ptable.add_row([ row[col] for col in self.columns ])

            if self.tablefmt == TableFormat.TABLE:
                return ptable.get_string(sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8')
            elif self.tablefmt == TableFormat.HTML:
                return ptable.get_html_string(sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8')
Exemple #9
0
 def json_spec(self):
     return json.dumps(self.spec)
Exemple #10
0
    def run(self):
        self.logger = log.get_logger('Job')
        self.job_api = JobApi()
        self.tasks_api = TasksApi()

        try:
            result = self.job_api.query({ 'job_id': self.options.job_id })
        except exceptions.ApiException as e:
            print e.message
            sys.exit(1)

        if self.options.spec:
            print json.dumps(result.spec, sort_keys=True, indent=4 * ' ')
        else:
            try:
                finished_tasks = self.tasks_api.query({
                    'job_id': self.options.job_id,
                    'state': 'SUCCESS'
                })
            except exceptions.ApiException as e:
                print e.message
                sys.exit(1)

            files_loaded = len(finished_tasks)
            rows_loaded = reduce(lambda x, y: x + y.get('data', {}).get('row_count', 0), finished_tasks, 0)
            avg_rows_per_file = None
            avg_rows_per_second = None

            if files_loaded > 0:
                avg_rows_per_file = rows_loaded / files_loaded

                min_start_time = datetime.datetime.max
                max_stop_time = datetime.datetime.min
                for row in finished_tasks:
                    for step in row.steps:
                        if step['name'] == 'download':
                            min_start_time = min(min_start_time, step['start'])
                            max_stop_time = max(max_stop_time, step['stop'])
                            break
                    else:
                        continue
                avg_rows_per_second = rows_loaded / (max_stop_time - min_start_time).total_seconds()

            result['stats'] = { k: v for k, v in {
                'files_loaded': files_loaded,
                'rows_loaded': rows_loaded,
                'avg_rows_per_file': avg_rows_per_file,
                'avg_rows_per_second': avg_rows_per_second
            }.iteritems() if v is not None }

            if result.tasks_total > 0:
                result['stats'].update({
                    'success_rate': result.tasks_succeeded * 1.0 / result.tasks_total,
                    'error_rate': result.tasks_errored * 1.0 / result.tasks_total
                })

            result["database"] = result.spec["target"]["database"]
            result["table"] = result.spec["target"]["table"]

            result = dict(result)
            del result['spec']

            result = { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in result.iteritems() }
            print json.dumps(result, sort_keys=True, indent=4 * ' ')