def __init__(self, config, db_session=None, fill_date=None): self.config = read_config_file(config, __file__) self.config_generation = self.config['generation'] self.db_session = db_session if db_session else session_factory() if fill_date is None: self.curr_fill, self.curr_fill_date = get_latest_fill_id( self.db_session) else: fill_dt = make_dump_date_from_str(fill_date) self.curr_fill, self.curr_fill_date = get_exact_fill_id( self.db_session, fill_dt) self.metric_combinations = None self.metric_creator = None self.metric_job = None self.pid = os.getpid()
def __init__(self, config): self.config = read_config_file(config, __file__) self.config_insertion = self.config['insertion'] self.overwrite = self.config_insertion[ 'overwrite'] if 'overwrite' in self.config_insertion else False self.frontfill_backfill = os.getenv("HUMANIKI_BACKFILL", "front") self.db_session = db.session_factory() self.working_fill_date = None self.humaniki_override_date = os.getenv("HUMANIKI_OVERRIDE_DATE", None) if self.humaniki_override_date is not None: self.working_fill_date = make_dump_date_from_str( self.humaniki_override_date) self.metrics_factory = None self.num_procs = os.getenv("HUMANIKI_NUM_PROCS", 4) self.fill_id = None self.dry_run = bool(int(os.getenv('HUMANIKI_DRY_RUN', '0'))) log.info("Humaniki Orchestrator intialized")
def _create_metric_creators(self): if self.metric_job is not None: mc = MetricCreator( population_definition=PopulationDefinition( self.metric_job.detail["population_definition"]), bias_property=Properties( self.metric_job.detail['bias_property']), dimension_properties=[ Properties(d) for d in self.metric_job.detail['dimension_properties'] ], threshold=self.metric_job.detail['threshold'], fill_id=self.metric_job.fill_id, properties_id=self.metric_job.detail['properties_id'], db_session=session_factory()) self.metric_creator = mc log.info(f"hydrate metric creator") else: log.info(f'PID:{self.pid} No metrics creator to hydrate') sys.exit(29) # special signal to calling bash.
def __init__(self, config, dump_date=None, dump_subset=None, insert_strategy=None): self.config = hs_utils.read_config_file(config, __file__) self.config_insertion = self.config['insertion'] self.overwrite = self.config_insertion['overwrite'] if 'overwrite' in self.config_insertion else False self.only_files = self.config_insertion['only_files'] if 'only_files' in self.config_insertion else None self.insert_strategy = insert_strategy if insert_strategy is not None else "infile" self.dump_date = hs_utils.make_dump_date_from_str(dump_date) if dump_date else None self.dump_subset = dump_subset self.dump_date_str = None self.fill_id = None self.detection_type = None self.db_session = session_factory() # order is important becuse of foreign key constraint self.csvs = None self.CSV_NA_VALUE = r'\N' self.table_column_map = { 'human': {"insert_columns": ['qid', 'gender', 'year_of_birth', 'sitelink_count'], "extra_const_columns": {}, "escaping_options": ""}, 'human_country': {"insert_columns": ['human_id', 'country'], "extra_const_columns": {}, "escaping_options": ""}, 'human_occupation': {"insert_columns": ['human_id', 'occupation'], "extra_const_columns": {}, "escaping_options": ""}, 'human_sitelink': {"insert_columns": ['human_id', 'sitelink'], "extra_const_columns": {}, "escaping_options": ""}, 'label': {"insert_columns": ['qid', 'label'], "extra_const_columns": {'lang': 'en'}, "escaping_options": """OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\\\'"""}, 'occupation_parent': {"insert_columns": ['occupation', 'parent'], "extra_const_columns": {}, "escaping_options": ""}, }
from humaniki_backend import app from humaniki_schema.schema import metric from humaniki_schema.utils import read_config_file from unittest import TestCase tc = TestCase() config = read_config_file(os.environ['HUMANIKI_YAML_CONFIG'], __file__) # TODO. If you generate the data seperately and then run the tests they pass. But if you ask the data # to be generated here, sometimes there are no metrics created, despite, metrics count showing nonzero. # a mystery. skip_generation = config['test']['skip_gen'] if 'skip_gen' in config['test'] else False if not skip_generation: generated = generate_example_data.generate_all(config=config) print(f'generated: {generated}') session = db.session_factory() metrics_count = session.query(func.count(metric.fill_id)).scalar() print(f'number of metrics: {metrics_count}') assert metrics_count>0 @pytest.fixture def test_jsons(): test_files = {} test_datadir = config['test']['test_datadir'] files = os.listdir(test_datadir) json_fs = [f for f in files if f.endswith('.json')] for json_f in json_fs: j = json.load(open(os.path.join(test_datadir, json_f))) test_files[json_f] = j return test_files
import sqlalchemy from sqlalchemy import func, and_, or_, cast, String from sqlalchemy.dialects.mysql import JSON from sqlalchemy.orm import aliased from sqlalchemy.orm.attributes import flag_modified from humaniki_schema import utils as hs_utils from humaniki_schema.schema import fill, metric_properties_j, metric_properties_n, metric_aggregations_j, \ metric_aggregations_n, \ project, metric import humaniki_schema.utils as hs_utils from humaniki_schema.db import session_factory db_session = session_factory() def get_properties_obj(bias_property, dimension_properties, session=None, table=metric_properties_j, as_subquery=False, create_if_no_exist=False): """ the main entry point, get a property id, by the bias property (like gender) and the other dimensions the order we are storing the properties, either in json or normalized is 0: bias_value, 1..n: properties sorted by their PID (and sitelink is faked as PID 0). overview get_property