Beispiel #1
0
    def get_ps_str(self):
        if self.options.jobs:
            try:
                active_rows = JobsApi().query({
                    'state': [shared.JobState.QUEUED, shared.JobState.RUNNING],
                })
            except exceptions.ApiException as e:
                self.error = True
                return e.message

            # We want this row to have the same format as the one from Tasks
            for row in active_rows:
                row['data'] = {
                    k: v
                    for k, v in {
                        'time_left': row.time_left
                    }.iteritems() if v is not None
                }
        else:
            active_rows = Tasks().get_tasks_in_state(
                [shared.TaskState.RUNNING])

        if len(active_rows) == 0:
            self.error = True
            return 'No currently running %s' % ('jobs' if self.options.jobs
                                                else 'tasks')

        # Sorting the columns
        if self.options.order_by is not None:
            self._sort(active_rows)

        # Calculate the maximum number of digits for the tasks column
        if self.options.jobs:
            total_tasks = 0
            for row in active_rows:
                total_tasks += row.tasks_total
            self.max_tasks_digits = len(str(total_tasks))

        formatted_tasks = []
        for row in active_rows:
            formatted_row = {}
            for key, fn in self.KEY_FN.iteritems():
                formatted_row[key] = fn(row, for_display=True)

            # The tasks column for jobs requires special formatting
            if self.options.jobs:
                formatted_row.update(self._format_tasks_col(row))

            formatted_row.update(self._make_progress(row, 50))
            formatted_tasks.append(formatted_row)

        if len(active_rows) > 1:
            formatted_tasks.append(
                self._create_formatted_totals_row(active_rows))

        return PrettyPrinter(formatted_tasks,
                             columns=self.KEY_FN.keys(),
                             format=TableFormat.TABLE).format()
Beispiel #2
0
    def run(self):
        self.logger = log.get_logger('CancelTask')

        self.tasks = Tasks()
        rows_affected = self.tasks.bulk_finish(
            extra_predicate=('id = :task_id', {
                'task_id': self.options.task_id
            }))

        plural = not rows_affected == 1
        print 'Cancelled', rows_affected, 'task%s.' % ('s' if plural else '')
Beispiel #3
0
    def run(self):
        self.logger = log.get_logger('CancelJob')

        self.tasks = Tasks()

        rows_affected = 0
        if self.options.multiple:
            rows_affected = self.tasks.bulk_finish(
                extra_predicate=("job_id LIKE :job_id", {
                    'job_id': self.options.job_id + '%%'
                }))
        else:
            loader_storage = LoaderStorage()
            with loader_storage.transaction() as cursor:
                jobs = apsw_helpers.query(cursor,
                                          '''
                    SELECT id FROM jobs WHERE id LIKE :job_id
                ''',
                                          job_id=self.options.job_id + '%')

            if len(jobs) > 1:
                print len(jobs), 'jobs match this job ID:'
                print '\n'.join([row.id for row in jobs])
                print 'Please use a more specific prefix or specify the `--multiple` flag if you'
                print 'would like to cancel more than one job.'
                sys.exit(1)
            elif len(jobs) == 0:
                print '0 jobs match this job ID.'
                sys.exit(1)
            else:
                rows_affected = self.tasks.bulk_finish(
                    extra_predicate=("job_id = :job_id", {
                        'job_id': jobs[0].id
                    }))

        job_suffix = '(s)' if self.options.multiple else ''
        task_suffix = 's' if not rows_affected == 1 else ''
        print CANCEL_JOB_MESSAGE % (job_suffix, self.options.job_id,
                                    rows_affected, task_suffix)
Beispiel #4
0
    def queue_job(self):
        all_keys = list(self.job.get_files(s3_conn=self.s3_conn))

        paths = self.job.spec.source.paths

        if self.options.dry_run:
            print "DRY RUN SUMMARY:"
            print "----------------"
            if len(all_keys) == 0:
                print "Paths %s matched no files" % ([str(p) for p in paths])
            else:
                print "List of files to load:"
                for key in all_keys:
                    print key.name
                print "Example LOAD DATA statement to execute:"
                file_id = self.job.get_file_id(all_keys[0])
                print load_data.build_example_query(self.job, file_id)
            sys.exit(0)
        elif len(all_keys) == 0:
            self.logger.warning(
                "Paths %s matched no files. Please check your path specification (be careful with relative paths)."
                % ([str(p) for p in paths]))

        self.jobs = None
        spec = self.job.spec
        try:
            self.logger.info('Creating job')
            self.jobs = Jobs()
            self.jobs.save(self.job)

            self.tasks = Tasks()

            etags = []
            for key in all_keys:
                if key.scheme in ['s3', 'hdfs']:
                    etags.append(key.etag)

            if etags and not self.options.force:
                database, table = spec.target.database, spec.target.table
                host, port = spec.connection.host, spec.connection.port
                competing_job_ids = [
                    j.id for j in self.jobs.query_target(
                        host, port, database, table)
                ]
                md5_map = self.get_current_tasks_md5_map(
                    etags, competing_job_ids)
            else:
                # For files loading on the filesystem, we are not going to MD5 files
                # for performance reasons. We are also basing this on the assumption
                # that filesystem loads are generally a one-time operation.
                md5_map = None
                if self.options.force:
                    self.logger.info(
                        'Loading all files in this job, regardless of identical files that are currently loading or were previously loaded (because of the --force flag)'
                    )
                if self.job.spec.options.file_id_column is not None:
                    self.logger.info(
                        'Since you\'re using file_id_column, duplicate records will be checked and avoided'
                    )

            count = self.submit_files(all_keys, md5_map, self.job,
                                      self.options.force)

            if count == 0:
                self.logger.info('Deleting the job, it has no child tasks')
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s",
                                      self.job.id)
            else:
                self.logger.info("Successfully queued job with id: %s",
                                 self.job.id)

                if not servers.is_server_running():
                    self.start_server()

                if self.options.sync:
                    self.wait_for_job()

        except (Exception, AssertionError):
            self.logger.error(
                'Failed to submit files, attempting to roll back job creation...'
            )
            exc_info = sys.exc_info()
            if self.jobs is not None:
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s",
                                      self.job.id)
            # Have to use this old-style raise because raise just throws
            # the last exception that occured, which could be the one in
            # the above try/except block and not the original exception.
            raise exc_info[0], exc_info[1], exc_info[2]
Beispiel #5
0
    def run(self):
        self.jobs = Jobs()
        self.tasks = Tasks()
        task = None

        ignore = lambda *args, **kwargs: None
        signal.signal(signal.SIGINT, ignore)
        signal.signal(signal.SIGQUIT, ignore)

        try:
            while not self.exiting():
                time.sleep(random.random() * 0.5)
                task = self.tasks.start()

                if task is None:
                    self.worker_working.value = 0
                else:
                    self.worker_working.value = 1

                    job_id = task.job_id
                    job = self.jobs.get(job_id)

                    old_conn_id = task.data.get('conn_id', None)
                    if old_conn_id is not None:
                        self.kill_query_if_exists(job.spec.connection,
                                                  old_conn_id)

                    self.logger.info('Task %d: starting' % task.task_id)

                    try:
                        # can't use a pooled connection due to transactions staying open in the
                        # pool on failure
                        with pool.get_connection(
                                database=job.spec.target.database,
                                pooled=False,
                                **job.spec.connection) as db_connection:
                            db_connection.execute("BEGIN")
                            self._process_task(task, db_connection)
                        self.logger.info('Task %d: finished with success',
                                         task.task_id)
                    except (RequeueTask, ConnectionException):
                        self.logger.info(
                            'Task %d: download failed, requeueing',
                            task.task_id)
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                        task.requeue()
                    except TaskDoesNotExist as e:
                        self.logger.info(
                            'Task %d: finished with error, the task was either cancelled or deleted',
                            task.task_id)
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                    except WorkerException as e:
                        task.error(str(e))
                        self.logger.info('Task %d: finished with error',
                                         task.task_id)
                    except Exception as e:
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                        raise

            raise ExitingException()

        except ExitingException:
            self.logger.debug('Worker exiting')
            if task is not None and not task.valid():
                try:
                    task.requeue()
                except APSWSQLStepQueueException:
                    pass