Ejemplo n.º 1
0
    def run(self):
        log.info(f"Fakeme starts at {datetime.now()}")
        # get all fields and schemas for tables
        self.schemas, fields = MultiTableRunner(
            self.tables,
            rls=self.rls).get_fields_and_schemas(dump_schema=self.dump_schema)

        walk_list = []

        for path in self.paths_with_scripts:
            walker = Walker(path_to_dir=path, extension="hql", recursive=True)
            [walk_list.append(path) for path in walker.walk()]

        field_extractor = FieldRulesExtractor(fields, walk_list)
        # generate "value_rules.json" with rules for fields
        field_extractor.generate_rules()

        self.priority_dict = self.create_tables_priority_graph()
        for key, value in self.priority_dict.items():
            for table in value:
                if (table not in created and table not in self.with_data
                        and table not in self.priority_dict.get(key + 1, [])):
                    self.create_table(table)

        log.info(
            f"Fakeme finished data generation successful \n {datetime.now()}")
Ejemplo n.º 2
0
    def column_generator(self, column_cfg: Dict):
        """create column with values"""
        log.info("Generate column {}".format(column_cfg.name))
        # unique_values - count of unique values in column in this table
        # unique - flag, must be all values unique in this table or not
        column = []
        unique_values = self.row_numbers
        matches_k = self.cfg.matches
        unique = None
        percent_of_nulls = self.cfg.percent_of_nulls

        if self.table_settings:
            (
                unique_values,
                unique,
                matches_k,
                percent_of_nulls,
            ) = self.__process_table_settings(
                column_cfg, unique_values, unique, matches_k,
                percent_of_nulls)  # todo: refactor this
        # get field rule
        generating_rule = self.get_column_generating_rule(column_cfg.name)

        if self.table_id in self.chains:
            df_column = self.get_column_from_chained(column_cfg.name,
                                                     matches_k)
        else:
            df_column = None

        if not unique_values:
            unique_values = self.row_numbers
        if df_column:
            column = self.__process_df_column(df_column, column, column_cfg,
                                              unique_values, unique)

        if len(column) < unique_values:
            unique_values = unique_values - len(column)
        else:
            column = column[:unique_values]
            unique_values = 0
        if (column_cfg.len and math.isnan(generating_rule["len"])
                or column_cfg.len and generating_rule["len"] > column_cfg.len):
            generating_rule["len"] = column_cfg.len
        while unique_values:
            value = values_generator(generating_rule, unique)
            column.append(value)
            unique_values -= 1
        total_rows = self.row_numbers - len(column)
        rel_size = total_rows / len(column)
        num_copy = int(rel_size)
        base_column = copy.deepcopy(column)
        for _ in range(num_copy):
            column += base_column
        float_adding = rel_size - num_copy

        column += base_column[:int(len(base_column) * float_adding)]
        column = self.__config_mode_processing(column, column_cfg,
                                               percent_of_nulls)
        return column
Ejemplo n.º 3
0
 def table_prefix_in_column_name(self,
                                 column_name: Text) -> Union[Text, None]:
     """check do we have table name prefix in column name,
     to get possible auto aliasing"""
     table_names = list(self.schemas.keys())
     for table_name in table_names:
         if self._remove_plural_from_table_name(table_name) in column_name:
             log.info(f"Found alias with {table_name}")
             return table_name
Ejemplo n.º 4
0
 def generate_rules(self, remove_existed=True):
     if not remove_existed and os.path.isfile(self.file_name):
         log.info("{} with rules founded in {}".format(self.file_name, os.getcwd()))
     else:
         values_rules_dict = self.rules_extracts()
         with open(self.file_name, "w+") as outfile:
             json.dump(values_rules_dict, outfile, indent=2)
         log.info("{} with rules for fields was created".format(self.file_name))
     return True
Ejemplo n.º 5
0
 def _prepare_path(file_path, remove_old):
     """prepare folder and check target file"""
     if os.path.isfile(file_path):
         log.info("Founded old file {}".format(file_path))
         if remove_old:
             os.remove(file_path)
             log.info("File {} was removed".format(file_path))
         else:
             raise Exception(
                 "Impossible to generate data into file {}. "
                 "File already exist. Please delete file or set "
                 "'remove_old'=True".format(file_path))
     else:
         if not os.path.isdir(os.path.abspath(os.path.dirname(file_path))):
             os.makedirs(os.path.dirname(file_path))
     return file_path
Ejemplo n.º 6
0
 def get_depend_on_file(self):
     """find depends on other tables (data files)
     TODO: add support for multiple depends on files
     """
     dir_files = []
     if self.chains and self.table_id in self.chains:
         dir_files = self.__chain_tables(dir_files)
     else:
         for item in self.schema:
             if item["name"] in self.chained:
                 chained_tables = [
                     table for table in self.chained[item["name"]]
                     if table != self.table_id
                 ]
                 [
                     dir_files.append(file_name)
                     if file_name.startswith(table) else None
                     for table in chained_tables
                     for file_name in os.listdir(self.prefix)
                 ]
             elif "all" in self.prefix and item["name"] in self.chains[
                     "all"]:
                 table_chain = self.chains["all"]
                 key = table_chain.keys()[0]
                 if table_chain[key]["table"] != self.table_id:
                     src_file = os.path.join(
                         self.prefix,
                         "{}.{}".format(table_chain[key]["table"],
                                        self.file_format),
                     )
                     if not os.path.isfile(src_file):
                         raise ValueError
                     dir_files.append(src_file)
     if dir_files:
         log.info("Depend on: {}".format(dir_files))
         return os.path.join(self.prefix, dir_files[0])
     else:
         return []