Esempio n. 1
0
    def reset_segment_file(self):
        # File/Folder error checks
        if not os.path.exists(self.folder_path):
            raise PipelineSegmentError(
                "Couldn't find the pipeline segment's folder when trying to configure this object with the provided json file."
            )
        if not os.path.exists(self.folder_path +
                              copy.deepcopy(self.__json_file_name)):
            raise PipelineSegmentError(
                f"Couldn't find the pipeline segment's file named '{self.__json_file_name}' in the pipeline's directory when trying to configure this object with the provided json file."
            )

        dict_to_json_file({}, self.folder_path, self.file_name)
Esempio n. 2
0
    def file_name(self):
        """

            Gets the file name of the json file.
        """
        # -----
        if len(self.__function_pipe) == 0:
            raise PipelineSegmentError(
                "The pipeline segment has not performed any actions yet."
                " Please perform some methods with this object.")
        elif not self.__create_file:
            raise PipelineSegmentError(
                "This pipeline segment does not have saved "
                "file and thus can not have a file path.")
        else:
            return copy.deepcopy(self.__json_file_name)
Esempio n. 3
0
    def __add_function_to_que(self, function_name, parameters, params_dict):
        """

            Adds the function info the function que. If the segment has no
            json file name then generate one for it the given directory.

        Args:
            function_name: string
                Functions name

            params_dict: dict
                Parameter's name to their associated values.

        Note:
            This function should only ever be called by children of
            this object.
        """
        if self.__lock_interaction:
            raise PipelineSegmentError(
                "This pipeline has be locked down and "
                "will prevent futher changes to the generated flat file.")

        for delete_key in {
                "self", "df", "df_features", "_add_to_que", "params_dict"
        }:
            if delete_key in params_dict.keys():
                del params_dict[delete_key]

        for k, v in {k: v for k, v in params_dict.items()}.items():
            if k not in parameters:
                del params_dict[k]
            elif isinstance(v, set):
                params_dict[k] = list(v)

        self.__function_pipe.append((function_name, params_dict))

        # Generate new json file name with proper file/folder output attributes
        if len(self.__function_pipe) == 1 and not self.__json_file_name:
            FileOutput.__init__(
                self,
                f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
            )
            all_json_files = get_all_files_from_path(self.folder_path, ".json")
            while True:
                random_file_name = create_hex_decimal_string().upper()
                if random_file_name not in all_json_files:
                    break

            self.__segment_id = random_file_name
            self.__json_file_name = random_file_name + ".json"

        # Update json file
        if self.__create_file:
            self.__create_json_pipeline_segment_file()
Esempio n. 4
0
    def __configure_pipeline_segment_with_existing_file(self):
        """

            Attempts to get a json file and then re_init the 'function_pipe'
            and the 'json_file_name'.
        """

        FileOutput.__init__(
            self,
            f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
        )

        self.__function_pipe = deque()
        self.__json_file_name = copy.deepcopy(self.__segment_id) + ".json"

        # File/Folder error checks
        if not os.path.exists(self.folder_path):
            raise PipelineSegmentError(
                "Couldn't find the pipeline segment's folder when trying to configure this object with the provided json file."
            )
        if not os.path.exists(self.folder_path +
                              copy.deepcopy(self.__json_file_name)):
            raise PipelineSegmentError(
                f"Couldn't find the pipeline segment's file named '{self.__json_file_name}' in the pipeline's directory when trying to configure this object with the provided json file."
            )

        json_dict = json_file_to_dict(self.folder_path +
                                      copy.deepcopy(self.__json_file_name))

        # Push functions into function pipe
        for function_order in range(
                1, json_dict["Pipeline Segment"]["Function Count"] + 1):
            function_name = list(
                json_dict["Pipeline Segment"]["Functions Performed Order"]
                [f"Function Order {function_order}"].keys())[0]
            params_dict = json_dict["Pipeline Segment"][
                "Functions Performed Order"][
                    f"Function Order {function_order}"][function_name][
                        "Params Dict"]
            self.__function_pipe.append((function_name, params_dict))
Esempio n. 5
0
    def __replace_function_in_que(self, function_name, params_dict, param,
                                  param_val):

        raise ValueError("This function hasn't been completed yet!")

        if self.__lock_interaction:
            raise PipelineSegmentError(
                "This pipeline has be locked down and "
                "will prevent futher changes to the generated flat file.")

        for delete_key in {
                "self", "df", "df_features", "_add_to_que", "params_dict"
        }:
            if delete_key in params_dict.keys():
                del params_dict[delete_key]

        for k, v in {k: v for k, v in params_dict.items()}.items():
            if k not in parameters:
                del params_dict[k]
            elif isinstance(v, set):
                params_dict[k] = list(v)

        self.__function_pipe.append((function_name, params_dict))

        # Generate new json file name with proper file/folder output attributes
        if len(self.__function_pipe) == 1 and not self.__json_file_name:
            FileOutput.__init__(
                self,
                f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
            )
            all_json_files = get_all_files_from_path(self.folder_path, ".json")
            while True:
                random_file_name = create_hex_decimal_string().upper()
                if random_file_name not in all_json_files:
                    break

            self.__segment_id = random_file_name
            self.__json_file_name = random_file_name + ".json"

        # Update json file
        if self.__create_file:
            self.__create_json_pipeline_segment_file()
Esempio n. 6
0
    def __init__(self, object_type, segment_id=None, create_file=True):
        """
        Args:
            object_type: string
                The child type of all object's that inherited DataPipelineSegment

            segment_id: string
                 If init as a string instead of None; the object will attempt
                 to find the json file in the provided directory.
        Note:
            Essentially we are serializing the object with json files.
        """

        self.__json_file_name = None
        self.__object_type = copy.deepcopy(object_type)

        if not isinstance(segment_id, str) and segment_id:
            raise UnsatisfiedRequirments(
                "Segment id must be a string or set to 'None'!")

        if segment_id and not create_file:
            raise PipelineSegmentError(
                "Parameter conflict: segment_id is referring "
                "to a saved file but create_file is set to False.")

        # File extension removal
        if isinstance(segment_id, str):
            segment_id = segment_id.split(".")[0]
        self.__segment_id = copy.deepcopy(segment_id)

        # Pushes the functions info based on order they are called
        self.__function_pipe = deque()

        self.__create_file = create_file
        self.__lock_interaction = False

        # Attempt to get json file into object's attributes.
        if self.__segment_id:
            self.__configure_pipeline_segment_with_existing_file()