Example #1
0
 def run(self, events_table):
     if not events_table:
         raise ValueError(
             dedent('''Section: events_table -
         Section not found. You must define an events table.'''))
     table_should_have_data(events_table, self.db_engine)
     column_should_be_intlike(events_table, 'entity_id', self.db_engine)
     column_should_be_timelike(events_table, 'outcome_date', self.db_engine)
     column_should_be_booleanlike(events_table, 'outcome', self.db_engine)
Example #2
0
 def validate(self, db_engine):
     logging.info('Validating from_obj %s', self.materialized_table)
     table_should_exist(self.materialized_table, db_engine)
     logging.info('Table %s successfully found', self.materialized_table)
     table_should_have_column(self.materialized_table, 'entity_id',
                              db_engine)
     logging.info('Successfully found entity_id column in %s',
                  self.materialized_table)
     table_should_have_column(self.materialized_table,
                              self.knowledge_date_column, db_engine)
     column_should_be_timelike(self.materialized_table,
                               self.knowledge_date_column, db_engine)
     logging.info(
         'Successfully found configured knowledge date column in %s',
         self.materialized_table)
Example #3
0
 def validate(self, db_engine):
     logger.spam(f"Validating from_obj {self.materialized_table}")
     table_should_exist(self.materialized_table, db_engine)
     logger.spam(f"Table {self.materialized_table} successfully found")
     table_should_have_column(self.materialized_table, 'entity_id',
                              db_engine)
     logger.spam(
         f"Successfully found entity_id column in {self.materialized_table}"
     )
     table_should_have_column(self.materialized_table,
                              self.knowledge_date_column, db_engine)
     column_should_be_timelike(self.materialized_table,
                               self.knowledge_date_column, db_engine)
     logger.spam(
         f"Successfully found configured knowledge date column in {self.materialized_table}"
     )
Example #4
0
 def run(self, state_config):
     if 'table_name' in state_config:
         dense_state_table = state_config['table_name']
         table_should_have_data(dense_state_table, self.db_engine)
         column_should_be_intlike(dense_state_table, 'entity_id',
                                  self.db_engine)
         column_should_be_stringlike(dense_state_table, 'state',
                                     self.db_engine)
         column_should_be_timelike(dense_state_table, 'start_time',
                                   self.db_engine)
         column_should_be_timelike(dense_state_table, 'end_time',
                                   self.db_engine)
         if 'state_filters' not in state_config or len(
                 state_config['state_filters']) < 1:
             raise ValueError(
                 dedent('''Section: state_config -
             If a table_name is given in state_config,
             at least one state filter must be present'''))
     else:
         logging.warning('No table_name found in state_config.' +
                         'The provided events table will be used, which ' +
                         'may result in unnecessarily large matrices')
Example #5
0
    def _run(self, cohort_config):
        mutex_keys = set(['dense_states', 'entities_table', 'query'])
        available_keys = mutex_keys | set(['name'])
        used_keys = set(cohort_config.keys())
        bad_keys = used_keys - available_keys
        if bad_keys:
            raise ValueError(
                dedent('''
                Section: cohort_config -
                The following given keys: '{}'
                are invalid. Available keys are: '{}'
                '''.format(bad_keys, available_keys)))
        used_mutex_keys = mutex_keys & used_keys
        if len(used_mutex_keys) > 1:
            raise ValueError(
                dedent('''
                Section: cohort_config -
                Only one of the following keys can be sent: '{}'
                Found '{}'
                '''.format(mutex_keys, used_mutex_keys)))

        if 'dense_states' in cohort_config:
            state_config = cohort_config['dense_states']
            if 'table_name' not in state_config:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                a table name must be present'''))
            dense_state_table = state_config['table_name']
            table_should_have_data(dense_state_table, self.db_engine)
            column_should_be_intlike(dense_state_table, 'entity_id',
                                     self.db_engine)
            column_should_be_stringlike(dense_state_table, 'state',
                                        self.db_engine)
            column_should_be_timelike(dense_state_table, 'start_time',
                                      self.db_engine)
            column_should_be_timelike(dense_state_table, 'end_time',
                                      self.db_engine)
            if 'state_filters' not in state_config or len(
                    state_config['state_filters']) < 1:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                at least one state filter must be present'''))
        elif 'entities_table' in cohort_config:
            entities_table = cohort_config['entities_table']
            table_should_have_data(entities_table, self.db_engine)
            column_should_be_intlike(entities_table, 'entity_id',
                                     self.db_engine)
        elif 'query' in cohort_config:
            query = cohort_config['query']
            if '{as_of_date}' not in query:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'query' is used as cohort_config,
                {as_of_date} must be present'''))
            dated_query = query.replace('{as_of_date}',
                                        '2016-01-01::timestamp')
            conn = self.db_engine.connect()
            logging.info('Validating cohort query')
            try:
                conn.execute('explain {}'.format(dated_query))
            except Exception as e:
                raise ValueError(
                    dedent('''
                    Section: cohort_config -
                    given query can not be run with a sample as_of_date .
                    query: "{}"
                    Full error: {}'''.format(query, e)))
Example #6
0
    def _run(self, cohort_config):
        mutex_keys = set(["dense_states", "entities_table", "query"])
        available_keys = mutex_keys | set(["name"])
        used_keys = set(cohort_config.keys())
        bad_keys = used_keys - available_keys
        if bad_keys:
            raise ValueError(
                dedent("""
                Section: cohort_config -
                The following given keys: '{}'
                are invalid. Available keys are: '{}'
                """.format(bad_keys, available_keys)))
        used_mutex_keys = mutex_keys & used_keys
        if len(used_mutex_keys) > 1:
            raise ValueError(
                dedent("""
                Section: cohort_config -
                Only one of the following keys can be sent: '{}'
                Found '{}'
                """.format(mutex_keys, used_mutex_keys)))

        if "dense_states" in cohort_config:
            state_config = cohort_config["dense_states"]
            if "table_name" not in state_config:
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                a table name must be present"""))
            dense_state_table = state_config["table_name"]
            table_should_have_data(dense_state_table, self.db_engine)
            column_should_be_intlike(dense_state_table, "entity_id",
                                     self.db_engine)
            column_should_be_stringlike(dense_state_table, "state",
                                        self.db_engine)
            column_should_be_timelike(dense_state_table, "start_time",
                                      self.db_engine)
            column_should_be_timelike(dense_state_table, "end_time",
                                      self.db_engine)
            if ("state_filters" not in state_config
                    or len(state_config["state_filters"]) < 1):
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                at least one state filter must be present"""))
        elif "entities_table" in cohort_config:
            entities_table = cohort_config["entities_table"]
            table_should_have_data(entities_table, self.db_engine)
            column_should_be_intlike(entities_table, "entity_id",
                                     self.db_engine)
        elif "query" in cohort_config:
            query = cohort_config["query"]
            if "{as_of_date}" not in query:
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'query' is used as cohort_config,
                {as_of_date} must be present"""))
            dated_query = query.replace("{as_of_date}", "2016-01-01")
            conn = self.db_engine.connect()
            logging.info("Validating cohort query")
            try:
                conn.execute(f"explain {dated_query}")
            except Exception as e:
                raise ValueError(
                    dedent(f"""
                    Section: cohort_config -
                    given query can not be run with a sample as_of_date .
                    query: "{query}"
                    Full error: {e}"""))