Exemple #1
0
            map(lambda r: r.run(self, checks_to_run), self.job_template.rules)
            [source.close_connection() for source in self.job_template.data_sources]
            # Dedupe checks_to_run even against checks. Expect of tuples of format (DataSource, table_name_string, Check)
            seen = set()
            seen_add = seen.add
            checks_to_run = [c for c in checks_to_run if not ((c[0].id, c[1], c[2].id) in seen or seen_add((c[0].id, c[1], c[2].id)))]
            if len(checks_to_run) > 0:
                # Bucketize checks based on parallelization chosen. Each bucket runs sequentially.
                checks_by_parallelization = self.get_checks_by_parallelization(checks_to_run) 

                # Run each bucket of checks in a separate celery worker, by turning each subarray into an array of celery run check
                # job signatures, and then splatting each array of run check signatures into a chain(requiring them to be done one 
                # at a time in each chain), then you group all chains together so they run in parallel. Each chain is a worker.
                # Then finally you call register finished when all done.
                separate_queues = [map(lambda c: celery_jobs.job_runs.run_check.si(c[0].id, c[1], c[2].id, self.id), chks) for chks in checks_by_parallelization]
                sep_chains = [chain(*queue) for queue in separate_queues]
                print sep_chains
                group_of_chains = (group(*sep_chains) | celery_jobs.job_runs.register_finished.s(self.id)).apply_async()
            else:
                self.set_finished()

        except Exception:
            self.set_failed()

        db_session.add(log)
        db_session.commit()


timestamps_triggers(JobRun)
Exemple #2
0
            and all it's checks and rules cloned as well.
        """

        self.parent_job_template_id = self.id
        self.read_only = True
        rules = self.rules
        ds = self.data_sources  # Grab before expunging.
        db_session.expunge(self)
        make_transient(self)

        [r.become_read_only_clone() for r in rules]
        [d.become_read_only_clone() for d in ds]

        self.id = None

        db_session.add(self)
        db_session.commit()

        self.rules = rules
        self.data_sources = ds

    def checks(self):
        seen = set()
        seen_add = seen.add
        all_checks = np.array(map(lambda r: r.checks, self.rules)).flatten()
        checks = [c for c in all_checks if not c.id in seen or seen_add(c.id)]
        return checks


timestamps_triggers(JobTemplate)
Exemple #3
0
                            {"traceback": traceback.format_exc()})

    def add_log(self, event, message, metadata={}):
        if not self.log:
            self.log = []
        self.log.append(self.__class__.new_event(event, message, metadata))

        # http://stackoverflow.com/questions/30088089/sqlalchemy-json-typedecorator-not-saving-correctly-issues-with-session-commit
        flag_modified(self, "log")


@event.listens_for(HasLogs, "mapper_configured", propagate=True)
def setup_listener(mapper, class_):
    name = class_.__name__
    loggable_type = name.lower()
    class_.logs = relationship(
        Log,
        primaryjoin=and_(class_.id == foreign(remote(Log.loggable_id)),
                         Log.loggable_type == loggable_type),
        backref=backref("parent_%s" % loggable_type,
                        primaryjoin=remote(class_.id) == foreign(
                            Log.loggable_id)),
        cascade="all, delete-orphan")

    @event.listens_for(class_.logs, "append")
    def append_address(target, value, initiator):
        value.loggable_type = loggable_type


timestamps_triggers(Log)
Exemple #4
0
        return host

    def config(self):
        return {
            "host": self.host,
            "port": self.port,
            "user": self.user,
            "password": self.password,
            "dbname": self.dbname,
            "data_source_type": self.data_source_type.value
        }

    def open_connection(self):
        klazz = eval(self.config()['data_source_type'].title() + "Connection")
        self.db = klazz(**self.config())

    def close_connection(self):
        self.db.close()

    def tables(self):
        return self.db.tables(self.schemas)

    def col_present(self, table, column):
        return self.db.col_present(table, column)

    def count(self, table):
        return self.db.count(table)


timestamps_triggers(DataSource)
Exemple #5
0
                metadata["log_metadata"])
            db_session.add(job_run)

            if (job_run.status in [
                    JobRunStatus.failed, JobRunStatus.cancelled,
                    JobRunStatus.rejected
            ]):
                log.add_log(
                    "cancelled",
                    "Check cancelled due to Job Run Status of %s caused by some other worker."
                    % (job_run.status))
            else:
                chk_class = eval(
                    camelize(str(self.check_type.value)) + "Check")

                check = chk_class(metadata)
                check.run()
                log.add_log("finished", "Check Ended",
                            metadata["log_metadata"])
        except Exception as e:
            print str(sys.exc_info())
            log.new_error_event("Check Failed due to Error",
                                metadata["log_metadata"])
            db_session.commit()
            raise

        db_session.commit()


timestamps_triggers(Check)
Exemple #6
0
        tables_and_sources = []
        [tables_and_sources.append([source, source.tables()]) for source in job_run.job_template.data_sources]
        return tables_and_sources


    def run(self, job_run, checks_to_run, tables_and_sources = None):
        tables_and_sources = self.all_tables_with_source(job_run) if tables_and_sources == None else tables_and_sources
        log = self.get_log(job_run=job_run)
        log.add_log("creation", "Begin Rule Check")

        try:
            log.add_log("check", "Checking %s condition for conditional %s" % (self.condition.value, self.conditional))

            # Whittle down the [source, [table1, table2...]] to same structure but only with tables matching condition.
            tables_and_sources_matching = [getattr(self, self.condition.value)(self.conditional, source_and_tables[0], source_and_tables[1], job_run) for source_and_tables in tables_and_sources]

            # Add tuples like (source, table, checkObj) to checks_to_run array by iterating over every source,
            # every table in that source, every check for that table. Triple generator magic.
            [[[checks_to_run.append((source_and_tables[0], table, check)) for check in self.checks] for table in source_and_tables[1]] for source_and_tables in tables_and_sources_matching]

            # Now allow any children rules to apply to tables that have been matched by this rule:
            [child.run(job_run, checks_to_run, tables_and_sources_matching) for child in self.children]
            log.add_log("finished", "Rule Check Ended")
        except Exception:
            print str(sys.exc_info())
            log.new_error_event()
            raise


timestamps_triggers(Rule)