예제 #1
0
 def __init__(self, config, db_session=None, fill_date=None):
     self.config = read_config_file(config, __file__)
     self.config_generation = self.config['generation']
     self.db_session = db_session if db_session else session_factory()
     if fill_date is None:
         self.curr_fill, self.curr_fill_date = get_latest_fill_id(
             self.db_session)
     else:
         fill_dt = make_dump_date_from_str(fill_date)
         self.curr_fill, self.curr_fill_date = get_exact_fill_id(
             self.db_session, fill_dt)
     self.metric_combinations = None
     self.metric_creator = None
     self.metric_job = None
     self.pid = os.getpid()
예제 #2
0
 def __init__(self, config):
     self.config = read_config_file(config, __file__)
     self.config_insertion = self.config['insertion']
     self.overwrite = self.config_insertion[
         'overwrite'] if 'overwrite' in self.config_insertion else False
     self.frontfill_backfill = os.getenv("HUMANIKI_BACKFILL", "front")
     self.db_session = db.session_factory()
     self.working_fill_date = None
     self.humaniki_override_date = os.getenv("HUMANIKI_OVERRIDE_DATE", None)
     if self.humaniki_override_date is not None:
         self.working_fill_date = make_dump_date_from_str(
             self.humaniki_override_date)
     self.metrics_factory = None
     self.num_procs = os.getenv("HUMANIKI_NUM_PROCS", 4)
     self.fill_id = None
     self.dry_run = bool(int(os.getenv('HUMANIKI_DRY_RUN', '0')))
     log.info("Humaniki Orchestrator intialized")
예제 #3
0
 def __init__(self, config, dump_date=None, dump_subset=None, insert_strategy=None):
     self.config = hs_utils.read_config_file(config, __file__)
     self.config_insertion = self.config['insertion']
     self.overwrite = self.config_insertion['overwrite'] if 'overwrite' in self.config_insertion else False
     self.only_files = self.config_insertion['only_files'] if 'only_files' in self.config_insertion else None
     self.insert_strategy = insert_strategy if insert_strategy is not None else "infile"
     self.dump_date = hs_utils.make_dump_date_from_str(dump_date) if dump_date else None
     self.dump_subset = dump_subset
     self.dump_date_str = None
     self.fill_id = None
     self.detection_type = None
     self.db_session = session_factory()
     # order is important becuse of foreign key constraint
     self.csvs = None
     self.CSV_NA_VALUE = r'\N'
     self.table_column_map = {
         'human':
             {"insert_columns": ['qid', 'gender', 'year_of_birth', 'sitelink_count'],
              "extra_const_columns": {},
              "escaping_options": ""},
         'human_country':
             {"insert_columns": ['human_id', 'country'],
              "extra_const_columns": {},
              "escaping_options": ""},
         'human_occupation':
             {"insert_columns": ['human_id', 'occupation'],
              "extra_const_columns": {},
              "escaping_options": ""},
         'human_sitelink':
             {"insert_columns": ['human_id', 'sitelink'],
              "extra_const_columns": {},
              "escaping_options": ""},
         'label':
             {"insert_columns": ['qid', 'label'],
              "extra_const_columns": {'lang': 'en'},
              "escaping_options": """OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\\\'"""},
         'occupation_parent':
             {"insert_columns": ['occupation', 'parent'],
              "extra_const_columns": {},
              "escaping_options": ""},
     }
import json
import os
import time

import pytest
from sqlalchemy import func

from humaniki_schema import generate_example_data, db
from humaniki_backend import app
from humaniki_schema.schema import metric
from humaniki_schema.utils import read_config_file
from unittest import TestCase
tc = TestCase()

config = read_config_file(os.environ['HUMANIKI_YAML_CONFIG'], __file__)

# TODO. If you generate the data seperately and then run the tests they pass. But if you ask the data
# to be generated here, sometimes there are no metrics created, despite, metrics count showing nonzero.
# a mystery.
skip_generation = config['test']['skip_gen'] if 'skip_gen' in config['test'] else False
if not skip_generation:
    generated = generate_example_data.generate_all(config=config)
    print(f'generated: {generated}')
    session = db.session_factory()
    metrics_count = session.query(func.count(metric.fill_id)).scalar()
    print(f'number of metrics: {metrics_count}')
    assert metrics_count>0

@pytest.fixture
def test_jsons():
    test_files = {}