Esempio n. 1
0
 def test_configuration_failed(self):
     self.assertEqual(configuration.get("conf", "non_existing_keys"), None)
     self.assertEqual(
         configuration.get("conf", "non_existing_keys", "NEW_VALUE"),
         "NEW_VALUE")
     self.assertEqual(configuration.get("conf", "non_existing_keys", 100),
                      100)
     self.assertEqual(
         configuration.get("conf", "non_existing_keys", {"key": "value"}),
         {"key": "value"})
Esempio n. 2
0
 def append_plugin_parameter_from_configuration(init_params, plugin_parameters):
     for plugin_parameter in plugin_parameters.keys():
         plugin_parameter_value = \
             configuration.get("plugin_parameter", plugin_parameter)
         if plugin_parameter_value:
             init_params[plugin_parameter] = \
                 plugin_parameters[plugin_parameter]["type"](plugin_parameter_value)
     return init_params
Esempio n. 3
0
    def test_postgres_operator_operator_run_test_full_load_only_split_1(self):

        split_file_original_value \
            = configuration.get("plugin_parameter", "split_file_size_limit")
        configuration.set("plugin_parameter", "split_file_size_limit", "1")
        self.insert_ref_data()
        database_operator_params = {
            "connection_string": self.postgres_connection_string,
            "load_type": LoadType.Full_Load,
            "table_names": ['public.employee'],
            "full_load_plugin_name": "PgDefaultFullLoadPlugin",
            "cdc_plugin_name": None,
            "output_location": self.output_folder,
        }
        b = PostgresOperator(**database_operator_params)

        # start a thread to terminate the process
        def thread_to_terminate_processes(
                postgres_operator_object: PostgresOperator):
            time.sleep(5)
            postgres_operator_object.terminate_process_signal = True

        def insert_ref_cdc_data(connection_string: str):
            time.sleep(1)
            with psycopg2.connect(connection_string) as conn:
                with conn.cursor() as cursor:
                    cursor.execute("INSERT INTO employee VALUES (3, 'User3')")

        threading.Thread(target=thread_to_terminate_processes,
                         args=(b, )).start()
        threading.Thread(target=insert_ref_cdc_data,
                         args=(self.postgres_connection_string, )).start()
        b.execute()
        full_load_output_path_0 = os.path.join(self.output_folder, "full_load",
                                               "public_employee",
                                               "x00_public.employee.csv")
        full_load_output_path_1 = os.path.join(self.output_folder, "full_load",
                                               "public_employee",
                                               "x01_public.employee.csv")

        cdc_output_path = os.path.join(self.output_folder, "cdc")
        with open(full_load_output_path_0, 'r') as full_load_file:
            full_load_output_content_0 = full_load_file.read()
        with open(full_load_output_path_1, 'r') as full_load_file:
            full_load_output_content_1 = full_load_file.read()
        self.assertEqual(full_load_output_content_0, "1\tUser1\n")
        self.assertEqual(full_load_output_content_1, "2\tUser2\n")
        if os.path.exists(cdc_output_path):
            self.assertFalse("CDC output created for Full Load only job")
        configuration.set("plugin_parameter", "split_file_size_limit",
                          split_file_original_value)
Esempio n. 4
0
def initialize():
    """ Initialize the lineage settings """
    # register interrupt signal
    register_ctrl_c_signal()

    # create the logger
    create_rotating_log()
    database_operator_name = configuration.get("conf", "database_operator")
    database_operator_type = configuration.get("conf",
                                               "database_operator_type")
    connection_string = configuration.get("conf", "connection_string")
    load_type = configuration.get("conf", "load_type")
    full_load_plugin_name = configuration.get("conf", "full_load_plugin_name")
    cdc_plugin_name = configuration.get("conf", "cdc_plugin_name")
    table_names_str = configuration.get("conf", "table_names", default="")
    table_names = table_names_str.split(
        ",") if table_names_str.strip() != "" else []
    output_location = configuration.get("conf", "output_location")

    BaseDataBaseOperator.load_derived_classes()
    FullLoadBase.load_derived_classes()
    CDCBase.load_derived_classes()

    print(database_operator_name, database_operator_type, load_type,
          full_load_plugin_name, cdc_plugin_name, table_names_str, table_names,
          output_location)

    database_operator = BaseDataBaseOperator.get_object(
        database_operator_type, database_operator_name)

    database_operator_params = {
        "connection_string": connection_string,
        "load_type": LoadType[load_type],
        "table_names": table_names,
        "full_load_plugin_name": full_load_plugin_name,
        "cdc_plugin_name": cdc_plugin_name,
        "output_location": output_location,
    }

    retry_times = int(configuration.get("conf", "retry_times", 0))
    run_database_operator(database_operator, database_operator_params,
                          retry_times)
Esempio n. 5
0
 def test_configuration(self):
     self.assertEqual(configuration.get("conf", "database_operator"),
                      "Postgres-Default")
     self.assertEqual(configuration.get("conf", "database_operator_type"),
                      "Postgres")
     self.assertEqual(
         configuration.get("conf", "connection_string"),
         "host=127.0.0.1 dbname=repl user=ubuntu password=ubuntu")
     self.assertEqual(configuration.get("conf", "load_type"),
                      "Full_Load_And_CDC")
     self.assertEqual(configuration.get("conf", "full_load_plugin_name"),
                      "PgDefaultFullLoadPlugin")
     self.assertEqual(configuration.get("conf", "cdc_plugin_name"),
                      "PgDefaultCDCPlugin")
     self.assertEqual(configuration.get("conf", "table_names"),
                      "public.employees")
     self.assertEqual(configuration.get("conf", "output_location"),
                      "/mnt/d/delete_me")
     configuration.set("conf", "new_configuration_param", "new value")
     self.assertEqual(configuration.get("conf", "new_configuration_param"),
                      "new value")
Esempio n. 6
0
def create_rotating_log(relative_path: str = "",
                        file_name: str = "siirto.log",
                        logger_name: str = "siirto",
                        path: str = None):
    """
    Creates a rotating log

    :param relative_path: relative path for the logger from the path
    :type relative_path: str
    :param file_name: file_name to be used
    :type file_name: str
    :param logger_name: name for the logger
    :type logger_name: str
    :param path: base path to be used
    :type path: str
    """
    logger = logging.getLogger(logger_name)
    log_formatter = configuration.get(
        "logs", "log_formatter", "[%%(asctime)s] {%%(filename)s:%%(lineno)d} "
        "%%(levelname)s - %%(message)s")
    formatter = logging.Formatter(fmt=log_formatter,
                                  datefmt='%Y-%m-%d %H:%M:%S')

    if configuration.get("logs", "print_only_logs", "False") == "False":
        if not path:
            path = configuration.get("logs", "log_file_path",
                                     "/var/log/siirto")
        path = os.path.join(path, relative_path)
        _create_path(path)
        log_file_path = os.path.join(path, file_name)
        # add a rotating handler
        handler = RotatingFileHandler(
            log_file_path,
            maxBytes=int(
                configuration.get("logs", "log_file_max_bytes", "10485760")),
            backupCount=int(
                configuration.get("logs", "log_file_backup_count", "10")))
        handler.setFormatter(formatter)
    else:
        handler = logging.StreamHandler()
        handler.setFormatter(formatter)

    logger.addHandler(handler)
    log_level_name = configuration.get("logs", "log_file_log_level", "DEBUG")
    log_level = getattr(logging, log_level_name)
    logging.basicConfig(level=log_level, format=log_formatter)