Ejemplo n.º 1
0
def create_dir_structure(directory_path, create_sub_dir):
    """

        Creates required directory structures inside the parent
        directory figures.

    Args:
        directory_path: string
            Given path that already exists.

        create_sub_dir: string
            Sub directory to create a given folder path.

    Returns:
        Returns back the created directory.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    for directory in create_sub_dir.split("/"):
        directory_path += "/" + directory
        if not os.path.exists(directory_path):
            os.makedirs(directory_path)

    return correct_directory_path(directory_path)
Ejemplo n.º 2
0
def get_all_files_from_path(directory_path, file_extension=None):
    """

        Gets all filenames with the provided path.

    Args:
        directory_path: string
            Given path that already exists.

        file_extension: string
            Only return files that have a given extension.

    Returns:
        Returns back a set a filenames with the provided path.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    files_in_paths = []
    for (dirpath, dirnames, filenames) in os.walk(directory_path):

        if file_extension:
            file_extension = file_extension.replace(".", "")
            for file in filenames:
                if file.endswith(f'.{file_extension}'):
                    files_in_paths.append(file)
        else:
            files_in_paths.extend(filenames)
        break

    return set(files_in_paths)
Ejemplo n.º 3
0
def dict_to_json_file(dict_obj,
                      directory_path,
                      filename,
                      remove_file_extension=True):
    """

        Writes a dict to a json file.

    Args:
        dict_obj: dict
            Dictionary object.

        directory_path: string
            Given path that already exists.

        filename: string
            Json file's name.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    filename = convert_to_filename(filename,
                                   remove_file_extension=remove_file_extension)

    with open(f'{directory_path}{filename}.json', 'w',
              encoding='utf-8') as outfile:
        json.dump(dict_obj, outfile, ensure_ascii=False, indent=2)
Ejemplo n.º 4
0
def write_object_text_to_file(obj,
                              directory_path,
                              filename,
                              remove_file_extension=True):
    """

        Writes the object's string representation to a text file.

    Args:
        obj: any
            Any object that has a string 'repr'.

        directory_path: string
            Given path that already exists.

        filename: string
            Text file's name.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    filename = convert_to_filename(filename,
                                   remove_file_extension=remove_file_extension)
    file_dir = f'{directory_path}{filename}.txt'

    f = open(file_dir, 'w')
    f.write('obj = ' + repr(obj) + '\n')
    f.close()
Ejemplo n.º 5
0
def pickle_object_to_file(obj,
                          directory_path,
                          filename,
                          remove_file_extension=True):
    """

        Writes the object to a pickle file.

    Args:
        obj: any object
            Any python object that can be pickled.

        directory_path: string
            Given path that already exists.

        filename: string
             Pickle file's name.
    """
    try:
        directory_path = correct_directory_path(directory_path)
        check_if_directory_exists(directory_path)

        # Ensures no file extensions in filename
        filename = convert_to_filename(
            filename, remove_file_extension=remove_file_extension)
        file_dir = f'{directory_path}{filename}.pkl'
        list_pickle = open(file_dir, 'wb')
        pickle.dump(obj, list_pickle)
    finally:
        list_pickle.close()

    return file_dir
Ejemplo n.º 6
0
    def __init__(self,
                 dataset_name,
                 overwrite_full_path=None):
        """
        Args:
            dataset_name: string
                Sub directory to create on top of the directory
                'PARENT_OUTPUT_FOLDER_NAME'.

            overwrite_full_path: string
                The passed directory path must already exist. Will completely
                ignore the project name and attempt to point to this already
                created directory.
        """

        # Setup project structure
        if not overwrite_full_path:
            parent_structure = "/" + SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME \
                               + "/" + dataset_name + "/"

            create_dir_structure(os.getcwd(),
                                       parent_structure)
            tmp_path = correct_directory_path(
                os.getcwd() + parent_structure)

        # Trusting the user that this path must already exist
        else:
            overwrite_full_path = correct_directory_path(overwrite_full_path)

            # Path doesn't contain eflow's main output
            if f"/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/" not in overwrite_full_path:
                raise UnsatisfiedRequirments(f"Directory path must have {SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME} "
                                             f"as a directory name or this program will not work correctly.")

            # Unknown path found
            if not os.path.exists(overwrite_full_path):
                raise SystemError("The path must already be defined in full on "
                                  "your system to use a different directory "
                                  "structure than orginally intended.")

            tmp_path = overwrite_full_path

        from eflow._hidden.general_objects import enum
        self.__PROJECT = enum(PATH_TO_OUTPUT_FOLDER=tmp_path,
                              RELATIVE_PATH_TO_OUTPUT_FOLDER=tmp_path.split(f"/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/")[1])
Ejemplo n.º 7
0
    def __create_dataframe_snapshot_json_file(self, df, output_folder_path):
        """

            Creates a json file based on the dataframe's generated snapshot dict.

        Args:
            df: pd.Dataframe
                Pandas Dataframe object

            output_folder_path: string
                Output path the json object will move to.
        """
        output_folder_path = correct_directory_path(output_folder_path)

        meta_dict = self.__generate_dataframe_snapshot_dict(df)

        dict_to_json_file(meta_dict, output_folder_path, "Dataframe Snapshot")
Ejemplo n.º 8
0
def create_plt_png(directory_path,
                   sub_dir,
                   filename,
                   sharpness=1.7):

    """

        Saves the plt based image in the correct directory.

    Args:
        directory_path:
            Already existing directory path.

        sub_dir:
            Directory structure to create on top of the already generated path of
            'directory_path'.

        filename:
            Filename to save into the full path of 'directory_path' + 'sub_dir'.

        sharpness:
            Changes the image's sharpness to look better.
    """
    directory_path = correct_directory_path(directory_path)

    # Ensure directory structure is init correctly
    abs_path = create_dir_structure(directory_path,
                                    sub_dir)

    # Ensure file ext is on the file.
    if filename[-4:] != ".png":
        filename += ".png"

    # plt.show()

    plt.savefig(abs_path + "/" + filename, bbox_inches='tight')

    if sharpness:
        full_path = directory_path + sub_dir + "/" + filename
        adjust_sharpness(full_path,
                         full_path,
                         sharpness)
Ejemplo n.º 9
0
def get_unique_directory_path(directory_path, folder_name):
    """

        Iterate through directory structure until a unique folder name can be
        found.

        Note:
            Keeps changing the folder name by appending 1 each iteration.

    Args:
        directory_path: string
            Given path that already exists.

        folder_name: string
             Folder name to compare against other directories that exist in the
             directory_path.

    Returns:
        Returns back a directory path with a unique folder name.
    """

    # -----
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    create_dir_structure(directory_path=directory_path, create_sub_dir="")

    # Ensures the folder is unique in the directory
    iterable = 0
    while True:
        if iterable != 0:
            created_path = f'{directory_path}{folder_name} {iterable}'
        else:
            created_path = f'{directory_path}{folder_name}'

        if not os.path.exists(created_path):
            break

        iterable += 1

    return created_path
Ejemplo n.º 10
0
def get_all_directories_from_path(directory_path):
    """

       Gets directories names with the provided path.

    Args:
        directory_path: string
            Given path that already exists.

    Returns:
        Returns back a set a directories with the provided path.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    dirs_in_paths = []
    for (dirpath, dirnames, filenames) in os.walk(directory_path):
        dirs_in_paths.extend(dirnames)
        break

    return set(dirs_in_paths)
Ejemplo n.º 11
0
    def __init__(self,
                 pipeline_name,
                 df,
                 df_features=None,
                 pipeline_modify_id=None,
                 remove_past_contents=False):
        """
        Args:
            pipeline_name (str):
                Points to/generates a folder based on the pipeline's name.

            pipeline_modify_id (str,NoneType):
                If set to 'None' then will point the 'root' or the main template
                of the pipeline.

            remove_past_contents:
                If an already existing folder exists for this then move to
                eflow's personal garbage.
        """
        # Set up directory structure
        dir_path_to_pipeline = correct_directory_path(
            f"{os.getcwd()}/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/_Extras/Pipeline Structure/Data Pipeline/{pipeline_name}"
        )
        configure_existing_file = False

        # Get json proper file name
        if pipeline_modify_id:
            json_file = f"{pipeline_modify_id.split('.')[0]}.json"
        else:
            json_file = "root_pipeline.json"

        self.__df_features = None

        # Check if folder/file exist for the pipeline
        if os.path.exists(dir_path_to_pipeline):
            if os.path.exists(dir_path_to_pipeline + json_file):
                print(f"The file '{json_file}' exist!")
                FileOutput.__init__(
                    self,
                    f'_Extras/Pipeline Structure/Data Pipeline/{pipeline_name}'
                )
                configure_existing_file = True
            else:
                raise PipelineError(f"The file '{json_file}' does not exist!")

            # Create/Load in df_features to given object.
            if os.path.exists(dir_path_to_pipeline + "df_features.json"):
                df_features = DataFrameTypes(None)
                df_features.init_on_json_file(dir_path_to_pipeline +
                                              "df_features.json")
            else:
                if df_features is None:
                    raise PipelineError(
                        "When initializing a data pipeline structure "
                        "you must pass a DataFrameTypes object with "
                        "the correctly defined types!")

                # Create file representation
                else:
                    df_features.create_json_file_representation(
                        dir_path_to_pipeline, "df_features")

        # -----
        if df_features is None:
            raise PipelineError("When initializing a data pipeline structure "
                                "you must pass a DataFrameTypes object with "
                                "the correctly defined types!")
        self.__df_features = copy.deepcopy(df_features)

        # Check if root file exist or if pipeline modify id
        self.__pipeline_name = copy.deepcopy(pipeline_name)
        self.__pipeline_segment_deque = deque()
        self.__pipeline_segment_names = set()
        self.__pipeline_segment_path_id = set()
        self.__pipeline_modify_id = copy.deepcopy(pipeline_modify_id)

        self.__json_file_name = json_file

        # Json file does exist; init DataPipeline object correctly
        if configure_existing_file:
            if remove_past_contents:
                print("Moving past contents to eFlow's garbage.")
                move_folder_to_eflow_garbage(dir_path_to_pipeline,
                                             "Data Pipelines")
            else:
                print(
                    "Now configuring object with proper pipeline segments...")
                self.__configure_pipeline_with_existing_file()

        for removal_feature in set(df.columns) ^ set(
                df_features.all_features()):
            print(f"Removing the feature: \"{removal_feature}\"")
            df.drop(columns=removal_feature, inplace=True)
Ejemplo n.º 12
0
def generate_meta_data(df,
                       output_folder_path,
                       sub_dir):
    """

        Creates files representing the shape and feature types of the dataframe.

    Args:
        df: pd.Dataframe
            Pandas DataFrame object

        output_folder_path: str
            Pre defined path to already existing directory to output file(s).

        sub_dir: str
            Path to be possibly generated.

    Returns:
        Creates meta data on the passed datafrane.
    """
    create_dir_structure(output_folder_path,
                         correct_directory_path(sub_dir + "/Meta Data"))

    output_folder_path = correct_directory_path(output_folder_path)

    # Create files relating to dataframe's shape
    shape_df = pd.DataFrame.from_dict({'Rows': [df.shape[0]],
                                       'Columns': [df.shape[1]]})

    if shape_df.shape[0]:
        df_to_image(shape_df,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Dataframe Shape Table",
                    show_index=False)

    write_object_text_to_file(shape_df.to_dict('records'),
                              f"{output_folder_path}/{sub_dir}/Meta Data",
                              "Dataframe Shape Text")

    # Create files relating to dataframe's types
    dtypes_df = data_types_table(df)
    if dtypes_df.shape[0]:
        df_to_image(dtypes_df,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Dataframe Types Table",
                    show_index=True)

    plt.close("all")


    # Missing value table
    mis_val_table = missing_values_table(df)
    if mis_val_table.shape[0]:
        df_to_image(mis_val_table,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Missing Data Table",
                    show_index=True)

    plt.close("all")
Ejemplo n.º 13
0
def df_to_image(df,
                directory_path,
                sub_dir,
                filename,
                sharpness=1.7,
                col_width=8,
                row_height=0.625,
                font_size=14,
                header_color='#40466e',
                row_colors=['#f1f1f2', 'w'],
                edge_color='w',
                bbox=[0, 0, 1, 1],
                header_columns=0,
                ax=None,
                show_index=False,
                index_color="#add8e6",
                format_float_pos=None,
                show_plot=False,
                **kwargs):

    directory_path = correct_directory_path(directory_path)
    df = copy.deepcopy(df)

    if format_float_pos and format_float_pos >= 1:
        float_format = '{:,.' + str(format_float_pos) + 'f}'
        for col_feature in set(df.select_dtypes(include=["float"]).columns):
            df[col_feature] = df[col_feature].map(float_format.format)

    if ax is None:
        size = (np.array(df.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
        fig, ax = plt.subplots(figsize=size)
        ax.axis('off')

    if show_index:
        df.reset_index(inplace=True)

    mpl_table = ax.table(cellText=df.values,
                         bbox=bbox,
                         colLabels=df.columns,
                         **kwargs)

    mpl_table.auto_set_font_size(False)
    mpl_table.set_fontsize(font_size)

    for k, cell in six.iteritems(mpl_table._cells):
        cell.set_edgecolor(edge_color)
        if k[0] == 0 or k[1] < header_columns:
            cell.set_text_props(weight='bold', color='w')
            cell.set_facecolor(header_color)
        else:
            if index_color and show_index and k[1] == 0:
                cell.set_facecolor(index_color)
            else:
                cell.set_facecolor(row_colors[k[0] % len(row_colors)])

    if not sub_dir:
        sub_dir = ""

    create_plt_png(directory_path,
                   sub_dir,
                   filename,
                   sharpness)
    if show_plot:
        plt.show()

    plt.close("all")