def setUpClass(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
 def setUpClass(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
    def setUp(self):
        try:
            config_path = dict(os.environ)['UDL2_CONF']
        except Exception:
            config_path = UDL2_DEFAULT_CONFIG_PATH_FILE

        conf_tup = read_ini_file(config_path)
        udl2_conf = conf_tup[0]
        initialize_db_udl(udl2_conf)
        self.ref_schema = udl2_conf['udl2_db_conn']['db_schema']
        self.ref_table_name = Constants.UDL2_REF_MAPPING_TABLE(Constants.LOAD_TYPE_ASSESSMENT)

        # Testable Rules
        self.rule_names = transform_rules.keys()
        self.rule_conf = transform_rules
        self.rule_list = transformation_code_generator.generate_transformations(self.rule_names, rule_conf=self.rule_conf)
        self.testable_rules = []
        for rule in self.rule_list:
            self.testable_rules.append(rule[0])

        test_rows = []
        for rule in self.testable_rules:
            ins_dict = {
                'phase': -999,
                'source_table': 'ftest_table',
                'source_column': 'ftest_column',
                'target_table': 'ftest_table1',
                'target_column': 'ftest_column1',
                'transformation_rule': rule,
            }
            test_rows.append(ins_dict)
        with get_udl_connection() as conn:
            self.ref_table = conn.get_table(self.ref_table_name)
            conn.execute(self.ref_table.insert(test_rows))
def main():
    try:
        config_path_file = os.environ['UDL2_CONF']
    except Exception:
        config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE
    udl2_conf, udl2_flat_conf = read_ini_file(config_path_file)
    initialize_db_prod(udl2_conf)
    load_data('cat')
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
     self.output_dir = tempfile.mkdtemp()
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
     self.output_dir = tempfile.mkdtemp()
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
     initialize_all_db(udl2_conf, udl2_flat_conf)
     self.base_data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     self.conf = conf_tup[0]
     initialize_all_db(udl2_conf, udl2_flat_conf)
     self.base_data_dir = os.path.join(os.path.dirname(__file__), "..",
                                       "data")
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     udl2_conf = read_ini_file(config_path)
     if isinstance(udl2_conf, tuple):
         self.conf = udl2_conf[0]
     else:
         self.conf = udl2_conf
     self.test_expanded_dir = tempfile.mkdtemp()
 def setUp(self, ):
     # TODO: don't rely on env. var
     # TODO: mock the data instead of using ini file
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     udl2_conf = conf_tup[0]
     self.conf = udl2_conf
     self.maxDiff = None
 def setUp(self,):
     # TODO: don't rely on env. var
     # TODO: mock the data instead of using ini file
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     conf_tup = read_ini_file(config_path)
     udl2_conf = conf_tup[0]
     self.conf = udl2_conf
     self.maxDiff = None
Beispiel #12
0
 def setUp(self):
     try:
         config_path = dict(os.environ)['UDL2_CONF']
     except Exception:
         config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
     udl2_conf = read_ini_file(config_path)
     if isinstance(udl2_conf, tuple):
         self.conf = udl2_conf[0]
     else:
         self.conf = udl2_conf
     self.test_expanded_dir = tempfile.mkdtemp()
    def setUpClass(cls, data_dir):
        try:
            config_path_file = os.environ['UDL2_CONF']
        except Exception:
            config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE

        cls.udl2_conf, cls.settings = read_ini_file(config_path_file)
        cls.encrypt_helper = EncryptHelper(cls.settings)
        cls.data_dir = data_dir
        cls.gpg_home = cls.settings.get('gpg_home', None)
        # prepare gpg keys for tests
        if cls.gpg_home and not os.path.exists(cls.gpg_home):
            config_gpg = os.path.join(os.path.dirname(__file__), "../../../../config/gpg")
            shutil.copytree(config_gpg, cls.gpg_home)
Beispiel #14
0
    def setUpClass(cls, data_dir):
        try:
            config_path_file = os.environ['UDL2_CONF']
        except Exception:
            config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE

        cls.udl2_conf, cls.settings = read_ini_file(config_path_file)
        cls.encrypt_helper = EncryptHelper(cls.settings)
        cls.data_dir = data_dir
        cls.gpg_home = cls.settings.get('gpg_home', None)
        # prepare gpg keys for tests
        if cls.gpg_home and not os.path.exists(cls.gpg_home):
            config_gpg = os.path.join(os.path.dirname(__file__),
                                      "../../../../config/gpg")
            shutil.copytree(config_gpg, cls.gpg_home)
    def setUp(self):
        try:
            config_path = dict(os.environ)['UDL2_CONF']
        except Exception:
            config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
        conf_tup = read_ini_file(config_path)
        udl2_conf = conf_tup[0]
        initialize_db_udl(udl2_conf)
        self.rule_names = transform_rules.keys()
        self.rule_conf = transform_rules
        self.rule_list = transformation_code_generator.generate_transformations(self.rule_names, rule_conf=self.rule_conf)

        with get_udl_connection() as conn:
            trans = conn.get_transaction()
            for rule in self.rule_list:
                try:
                    conn.execute(rule[2])
                except ProgrammingError as e:
                    raise AssertionError('UNABLE TO CREATE FUNCTION: %s, Error: "%s"' % (rule[0], e))
                    trans.rollback()
            trans.commit()
def main():
    '''
    create or drop udl2 database objects according to command line.
    The purpose for this script is to enable clean up whole database artifacts or create
    whole database artifacts without problem. Since UDL uses databases to clean data. Database object
    in UDL can be dropped or recreated at will for changes. So we can verifiy system
    '''
    (parser, args) = _parse_args()
    if args.config_file is None:
        config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE
    else:
        config_path_file = args.config_file
    conf_tup = read_ini_file(config_path_file)
    udl2_conf = conf_tup[0]

    if args.action is None:
        parser.print_help()
        return
    if args.action == 'setup':
        setup_udl2_schema(udl2_conf)
    elif args.action == 'teardown':
        teardown_udl2_schema(udl2_conf)
    def setUp(self):
        try:
            config_path = dict(os.environ)['UDL2_CONF']
        except Exception:
            config_path = UDL2_DEFAULT_CONFIG_PATH_FILE
        conf_tup = read_ini_file(config_path)
        udl2_conf = conf_tup[0]
        initialize_db_udl(udl2_conf)
        self.rule_names = transform_rules.keys()
        self.rule_conf = transform_rules
        self.rule_list = transformation_code_generator.generate_transformations(
            self.rule_names, rule_conf=self.rule_conf)

        with get_udl_connection() as conn:
            trans = conn.get_transaction()
            for rule in self.rule_list:
                try:
                    conn.execute(rule[2])
                except ProgrammingError as e:
                    raise AssertionError(
                        'UNABLE TO CREATE FUNCTION: %s, Error: "%s"' %
                        (rule[0], e))
                    trans.rollback()
            trans.commit()
    def setUp(self):
        try:
            config_path = dict(os.environ)['UDL2_CONF']
        except Exception:
            config_path = UDL2_DEFAULT_CONFIG_PATH_FILE

        conf_tup = read_ini_file(config_path)
        udl2_conf = conf_tup[0]
        initialize_db_udl(udl2_conf)
        self.ref_schema = udl2_conf['udl2_db_conn']['db_schema']
        self.ref_table_name = Constants.UDL2_REF_MAPPING_TABLE(
            Constants.LOAD_TYPE_ASSESSMENT)

        # Testable Rules
        self.rule_names = transform_rules.keys()
        self.rule_conf = transform_rules
        self.rule_list = transformation_code_generator.generate_transformations(
            self.rule_names, rule_conf=self.rule_conf)
        self.testable_rules = []
        for rule in self.rule_list:
            self.testable_rules.append(rule[0])

        test_rows = []
        for rule in self.testable_rules:
            ins_dict = {
                'phase': -999,
                'source_table': 'ftest_table',
                'source_column': 'ftest_column',
                'target_table': 'ftest_table1',
                'target_column': 'ftest_column1',
                'transformation_rule': rule,
            }
            test_rows.append(ins_dict)
        with get_udl_connection() as conn:
            self.ref_table = conn.get_table(self.ref_table_name)
            conn.execute(self.ref_table.insert(test_rows))
Beispiel #19
0

FILES = ['BENCHMARK_RECORDS_10K.tar.gz.asc', 'BENCHMARK_RECORDS_50K.tar.gz.asc', 'BENCHMARK_RECORDS_100K.tar.gz.asc', 'BENCHMARK_RECORDS_500K.tar.gz.asc', 'BENCHMARK_RECORDS_2M.tar.gz.asc', 'BENCHMARK_RECORDS_5M.tar.gz.asc']
#CSV_FILES = ['REALDATA_RECORDS_10K.csv', 'REALDATA_RECORDS_50K.csv', 'REALDATA_RECORDS_100K.csv',
#             'REALDATA_RECORDS_500K.csv', 'REALDATA_RECORDS_2M.csv', 'REALDATA_RECORDS_5M.csv']
#JSON_FILES = ['METADATA_RECORDS_10K.json', 'METADATA_RECORDS_50K.json', 'METADATA_RECORDS_100K.json',
#              'METADATA_RECORDS_500K.json', 'METADATA_RECORDS_2M.json', 'METADATA_RECORDS_5M.json']
HISTORY_TABLE = 'HISTORY_TABLE'
BATCH_TABLE = 'UDL_BATCH'

__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
logger = get_task_logger(__name__)

config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE

conf_tup = read_ini_file(config_path_file)
udl2_conf = conf_tup[0]


def start_test(directory_path, memory, cpu, hist_db, hist_schema, port, user, passwd, host):
    '''
    kick off the tests by scheduling the task with celery
    '''
    run_pipeline.apply_async(({'directory': directory_path, 'memory': memory, 'cpu': cpu, 'hist_db': hist_db,
                               'hist_schema': hist_schema, 'port': port, 'user': user, 'passwd': passwd, 'host': host}, ))


@celery.task(name='benchmarking.run_benchmarking.run_pipeline')
def run_pipeline(msg):
    '''
    Run the pipeline for the current file and use this task as a callback for future tasks
Beispiel #20
0
def main():
    config_path_file = UDL2_DEFAULT_CONFIG_PATH_FILE
    udl2_conf, udl2_flat_conf = read_ini_file(config_path_file)
    initialize_db_target(udl2_conf)
    clean_up_unused_schemas()
    parser.add_argument('-r', dest='report_hour', help='report hour, e.g. yesterday 10am to today 10am', default='10')
    parser.add_argument('-p', dest='pidfile', default='/opt/edware/run/edudl2-report.pid',
                        help="pid file for edudl2 trigger daemon")
    parser.add_argument('-d', dest='daemon', action='store_true', default=False,
                        help="daemon mode for udl report")
    args = parser.parse_args()

    config_path_file = args.ini_file
    hour = args.hour
    report_hour = args.report_hour
    daemon_mode = args.daemon
    pid_file = args.pidfile
    if daemon_mode:
        create_daemon(pid_file)
    # get udl2 configuration as nested and flat dictionary
    udl2_conf, udl2_flat_conf = read_ini_file(config_path_file)
    initialize_all_db(udl2_conf, udl2_flat_conf)

    email_to = udl2_flat_conf.get(UDL_REPORT_MAIL_TO)
    subject = udl2_flat_conf.get(UDL_REPORT_SUBJECT)
    email_from = udl2_flat_conf.get(UDL_REPORT_MAIL_FROM)
    enabled = udl2_flat_conf.get(UDL_REPORT_ENABLED)
    start_date = today = datetime.datetime.today().strftime('%Y-%m-%d %H:00:00')
    if enabled is not None and enabled.lower() == 'true':
        generate_report_settings = {'report.enable': 'True',
                                    'report.schedule.cron.hour': hour,
                                    'report.schedule.cron.minute': '0',
                                    'report.schedule.cron.second': '0',
                                    'hour': report_hour,
                                    'mail_to': email_to,
                                    'subject': subject,
    parser.add_argument('-d',
                        dest='daemon',
                        action='store_true',
                        default=False,
                        help="daemon mode for udl report")
    args = parser.parse_args()

    config_path_file = args.ini_file
    hour = args.hour
    report_hour = args.report_hour
    daemon_mode = args.daemon
    pid_file = args.pidfile
    if daemon_mode:
        create_daemon(pid_file)
    # get udl2 configuration as nested and flat dictionary
    udl2_conf, udl2_flat_conf = read_ini_file(config_path_file)
    initialize_all_db(udl2_conf, udl2_flat_conf)

    email_to = udl2_flat_conf.get(UDL_REPORT_MAIL_TO)
    subject = udl2_flat_conf.get(UDL_REPORT_SUBJECT)
    email_from = udl2_flat_conf.get(UDL_REPORT_MAIL_FROM)
    enabled = udl2_flat_conf.get(UDL_REPORT_ENABLED)
    start_date = today = datetime.datetime.today().strftime(
        '%Y-%m-%d %H:00:00')
    if enabled is not None and enabled.lower() == 'true':
        generate_report_settings = {
            'report.enable': 'True',
            'report.schedule.cron.hour': hour,
            'report.schedule.cron.minute': '0',
            'report.schedule.cron.second': '0',
            'hour': report_hour,