def __init__(self, config, db_session=None, fill_date=None): self.config = read_config_file(config, __file__) self.config_generation = self.config['generation'] self.db_session = db_session if db_session else session_factory() if fill_date is None: self.curr_fill, self.curr_fill_date = get_latest_fill_id( self.db_session) else: fill_dt = make_dump_date_from_str(fill_date) self.curr_fill, self.curr_fill_date = get_exact_fill_id( self.db_session, fill_dt) self.metric_combinations = None self.metric_creator = None self.metric_job = None self.pid = os.getpid()
def __init__(self, config): self.config = read_config_file(config, __file__) self.config_insertion = self.config['insertion'] self.overwrite = self.config_insertion[ 'overwrite'] if 'overwrite' in self.config_insertion else False self.frontfill_backfill = os.getenv("HUMANIKI_BACKFILL", "front") self.db_session = db.session_factory() self.working_fill_date = None self.humaniki_override_date = os.getenv("HUMANIKI_OVERRIDE_DATE", None) if self.humaniki_override_date is not None: self.working_fill_date = make_dump_date_from_str( self.humaniki_override_date) self.metrics_factory = None self.num_procs = os.getenv("HUMANIKI_NUM_PROCS", 4) self.fill_id = None self.dry_run = bool(int(os.getenv('HUMANIKI_DRY_RUN', '0'))) log.info("Humaniki Orchestrator intialized")
def __init__(self, config, dump_date=None, dump_subset=None, insert_strategy=None): self.config = hs_utils.read_config_file(config, __file__) self.config_insertion = self.config['insertion'] self.overwrite = self.config_insertion['overwrite'] if 'overwrite' in self.config_insertion else False self.only_files = self.config_insertion['only_files'] if 'only_files' in self.config_insertion else None self.insert_strategy = insert_strategy if insert_strategy is not None else "infile" self.dump_date = hs_utils.make_dump_date_from_str(dump_date) if dump_date else None self.dump_subset = dump_subset self.dump_date_str = None self.fill_id = None self.detection_type = None self.db_session = session_factory() # order is important becuse of foreign key constraint self.csvs = None self.CSV_NA_VALUE = r'\N' self.table_column_map = { 'human': {"insert_columns": ['qid', 'gender', 'year_of_birth', 'sitelink_count'], "extra_const_columns": {}, "escaping_options": ""}, 'human_country': {"insert_columns": ['human_id', 'country'], "extra_const_columns": {}, "escaping_options": ""}, 'human_occupation': {"insert_columns": ['human_id', 'occupation'], "extra_const_columns": {}, "escaping_options": ""}, 'human_sitelink': {"insert_columns": ['human_id', 'sitelink'], "extra_const_columns": {}, "escaping_options": ""}, 'label': {"insert_columns": ['qid', 'label'], "extra_const_columns": {'lang': 'en'}, "escaping_options": """OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\\\'"""}, 'occupation_parent': {"insert_columns": ['occupation', 'parent'], "extra_const_columns": {}, "escaping_options": ""}, }
import json import os import time import pytest from sqlalchemy import func from humaniki_schema import generate_example_data, db from humaniki_backend import app from humaniki_schema.schema import metric from humaniki_schema.utils import read_config_file from unittest import TestCase tc = TestCase() config = read_config_file(os.environ['HUMANIKI_YAML_CONFIG'], __file__) # TODO. If you generate the data seperately and then run the tests they pass. But if you ask the data # to be generated here, sometimes there are no metrics created, despite, metrics count showing nonzero. # a mystery. skip_generation = config['test']['skip_gen'] if 'skip_gen' in config['test'] else False if not skip_generation: generated = generate_example_data.generate_all(config=config) print(f'generated: {generated}') session = db.session_factory() metrics_count = session.query(func.count(metric.fill_id)).scalar() print(f'number of metrics: {metrics_count}') assert metrics_count>0 @pytest.fixture def test_jsons(): test_files = {}