Example #1
0
    def __add_function_to_que(self, function_name, parameters, params_dict):
        """

            Adds the function info the function que. If the segment has no
            json file name then generate one for it the given directory.

        Args:
            function_name: string
                Functions name

            params_dict: dict
                Parameter's name to their associated values.

        Note:
            This function should only ever be called by children of
            this object.
        """
        if self.__lock_interaction:
            raise PipelineSegmentError(
                "This pipeline has be locked down and "
                "will prevent futher changes to the generated flat file.")

        for delete_key in {
                "self", "df", "df_features", "_add_to_que", "params_dict"
        }:
            if delete_key in params_dict.keys():
                del params_dict[delete_key]

        for k, v in {k: v for k, v in params_dict.items()}.items():
            if k not in parameters:
                del params_dict[k]
            elif isinstance(v, set):
                params_dict[k] = list(v)

        self.__function_pipe.append((function_name, params_dict))

        # Generate new json file name with proper file/folder output attributes
        if len(self.__function_pipe) == 1 and not self.__json_file_name:
            FileOutput.__init__(
                self,
                f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
            )
            all_json_files = get_all_files_from_path(self.folder_path, ".json")
            while True:
                random_file_name = create_hex_decimal_string().upper()
                if random_file_name not in all_json_files:
                    break

            self.__segment_id = random_file_name
            self.__json_file_name = random_file_name + ".json"

        # Update json file
        if self.__create_file:
            self.__create_json_pipeline_segment_file()
    def __init__(self, dataset_name, overwrite_full_path=None):
        """
        Args:
            dataset_name: string
                Sub directory to create on top of the directory
                'PARENT_OUTPUT_FOLDER_NAME'.

            overwrite_full_path: string
                The passed directory path must already exist. Will completely
                ignore the project name and attempt to point to this already
                created directory.
        """
        # Create/Setup project directory
        FileOutput.__init__(self, dataset_name, overwrite_full_path)
Example #3
0
    def __replace_function_in_que(self, function_name, params_dict, param,
                                  param_val):

        raise ValueError("This function hasn't been completed yet!")

        if self.__lock_interaction:
            raise PipelineSegmentError(
                "This pipeline has be locked down and "
                "will prevent futher changes to the generated flat file.")

        for delete_key in {
                "self", "df", "df_features", "_add_to_que", "params_dict"
        }:
            if delete_key in params_dict.keys():
                del params_dict[delete_key]

        for k, v in {k: v for k, v in params_dict.items()}.items():
            if k not in parameters:
                del params_dict[k]
            elif isinstance(v, set):
                params_dict[k] = list(v)

        self.__function_pipe.append((function_name, params_dict))

        # Generate new json file name with proper file/folder output attributes
        if len(self.__function_pipe) == 1 and not self.__json_file_name:
            FileOutput.__init__(
                self,
                f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
            )
            all_json_files = get_all_files_from_path(self.folder_path, ".json")
            while True:
                random_file_name = create_hex_decimal_string().upper()
                if random_file_name not in all_json_files:
                    break

            self.__segment_id = random_file_name
            self.__json_file_name = random_file_name + ".json"

        # Update json file
        if self.__create_file:
            self.__create_json_pipeline_segment_file()
Example #4
0
    def __configure_pipeline_segment_with_existing_file(self):
        """

            Attempts to get a json file and then re_init the 'function_pipe'
            and the 'json_file_name'.
        """

        FileOutput.__init__(
            self,
            f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}'
        )

        self.__function_pipe = deque()
        self.__json_file_name = copy.deepcopy(self.__segment_id) + ".json"

        # File/Folder error checks
        if not os.path.exists(self.folder_path):
            raise PipelineSegmentError(
                "Couldn't find the pipeline segment's folder when trying to configure this object with the provided json file."
            )
        if not os.path.exists(self.folder_path +
                              copy.deepcopy(self.__json_file_name)):
            raise PipelineSegmentError(
                f"Couldn't find the pipeline segment's file named '{self.__json_file_name}' in the pipeline's directory when trying to configure this object with the provided json file."
            )

        json_dict = json_file_to_dict(self.folder_path +
                                      copy.deepcopy(self.__json_file_name))

        # Push functions into function pipe
        for function_order in range(
                1, json_dict["Pipeline Segment"]["Function Count"] + 1):
            function_name = list(
                json_dict["Pipeline Segment"]["Functions Performed Order"]
                [f"Function Order {function_order}"].keys())[0]
            params_dict = json_dict["Pipeline Segment"][
                "Functions Performed Order"][
                    f"Function Order {function_order}"][function_name][
                        "Params Dict"]
            self.__function_pipe.append((function_name, params_dict))
 def __init__(self,
              widget_child_name):
     FileOutput.__init__(self,
                         f'_Extras/Pipeline Structure/Widgets/{widget_child_name}')
Example #6
0
    def __init__(self,
                 pipeline_name,
                 df,
                 df_features=None,
                 pipeline_modify_id=None,
                 remove_past_contents=False):
        """
        Args:
            pipeline_name (str):
                Points to/generates a folder based on the pipeline's name.

            pipeline_modify_id (str,NoneType):
                If set to 'None' then will point the 'root' or the main template
                of the pipeline.

            remove_past_contents:
                If an already existing folder exists for this then move to
                eflow's personal garbage.
        """
        # Set up directory structure
        dir_path_to_pipeline = correct_directory_path(
            f"{os.getcwd()}/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/_Extras/Pipeline Structure/Data Pipeline/{pipeline_name}"
        )
        configure_existing_file = False

        # Get json proper file name
        if pipeline_modify_id:
            json_file = f"{pipeline_modify_id.split('.')[0]}.json"
        else:
            json_file = "root_pipeline.json"

        self.__df_features = None

        # Check if folder/file exist for the pipeline
        if os.path.exists(dir_path_to_pipeline):
            if os.path.exists(dir_path_to_pipeline + json_file):
                print(f"The file '{json_file}' exist!")
                FileOutput.__init__(
                    self,
                    f'_Extras/Pipeline Structure/Data Pipeline/{pipeline_name}'
                )
                configure_existing_file = True
            else:
                raise PipelineError(f"The file '{json_file}' does not exist!")

            # Create/Load in df_features to given object.
            if os.path.exists(dir_path_to_pipeline + "df_features.json"):
                df_features = DataFrameTypes(None)
                df_features.init_on_json_file(dir_path_to_pipeline +
                                              "df_features.json")
            else:
                if df_features is None:
                    raise PipelineError(
                        "When initializing a data pipeline structure "
                        "you must pass a DataFrameTypes object with "
                        "the correctly defined types!")

                # Create file representation
                else:
                    df_features.create_json_file_representation(
                        dir_path_to_pipeline, "df_features")

        # -----
        if df_features is None:
            raise PipelineError("When initializing a data pipeline structure "
                                "you must pass a DataFrameTypes object with "
                                "the correctly defined types!")
        self.__df_features = copy.deepcopy(df_features)

        # Check if root file exist or if pipeline modify id
        self.__pipeline_name = copy.deepcopy(pipeline_name)
        self.__pipeline_segment_deque = deque()
        self.__pipeline_segment_names = set()
        self.__pipeline_segment_path_id = set()
        self.__pipeline_modify_id = copy.deepcopy(pipeline_modify_id)

        self.__json_file_name = json_file

        # Json file does exist; init DataPipeline object correctly
        if configure_existing_file:
            if remove_past_contents:
                print("Moving past contents to eFlow's garbage.")
                move_folder_to_eflow_garbage(dir_path_to_pipeline,
                                             "Data Pipelines")
            else:
                print(
                    "Now configuring object with proper pipeline segments...")
                self.__configure_pipeline_with_existing_file()

        for removal_feature in set(df.columns) ^ set(
                df_features.all_features()):
            print(f"Removing the feature: \"{removal_feature}\"")
            df.drop(columns=removal_feature, inplace=True)
Example #7
0
    def add(self, segment_name, pipeline_segment_obj):
        """
        segment_name (str):
            A aliased name to refer to this segment.

        pipeline_segment_obj (child of DataPipelineSegment):
            A child object of type DataPipelineSegment.

        Returns:
            Attempts to add a pipeline segment object to the objects que and
            update it's related json object.
        """

        # pipeline_segment_obj = copy.deepcopy(pipeline_segment_obj)

        # Type check
        if not isinstance(pipeline_segment_obj, DataPipelineSegment):

            raise UnsatisfiedRequirments(
                f"Expected a 'DataPipelineSegment' object; received '{type(pipeline_segment_obj)}'"
            )

        # Check if alias has already been used
        if segment_name in self.__pipeline_segment_names:
            raise PipelineError(
                f"The '{segment_name}' pipeline segment is already in this pipeline. Please choose a different segment name."
            )

        try:
            # Check if the pipeline segment has already been used
            segment_path_id = pipeline_segment_obj.relative_folder_path + pipeline_segment_obj.file_name
        except AttributeError:
            raise UnsatisfiedRequirments(
                "The given pipeline segment didn't perform any functions.")

        if segment_path_id in self.__pipeline_segment_path_id:
            raise PipelineError(
                "The segment has been already found "
                "in this pipeline Segment path id: " +
                f"'{segment_path_id}.'\n" + "This can be done by:"
                "\n\t*Creating a completely new segment object "
                "and adding it to the pipeline with the 'add'"
                " method."
                "\n\t*Refer to a different segment path id")
        else:
            # Que has yet to have data pushed; set up output directory for new data
            if len(self.__pipeline_segment_deque) == 0:
                FileOutput.__init__(
                    self,
                    f'_Extras/Pipeline Structure/Data Pipeline/{self.__pipeline_name}'
                )

                if os.path.exists(self.folder_path + "df_features.json"):
                    self.__df_features.init_on_json_file(self.folder_path +
                                                         "df_features.json")
                else:
                    self.__df_features.create_json_file_representation(
                        self.folder_path, "df_features.json")

        # Update data types for error checking
        self.__pipeline_segment_names.add(segment_name)
        self.__pipeline_segment_path_id.add(segment_path_id)

        # Main component of the project
        self.__pipeline_segment_deque.append(
            (segment_name, segment_path_id, pipeline_segment_obj))

        # Lock down the object to prevent users from continuing to interact with it after adding it to the pipeline
        pipeline_segment_obj._DataPipelineSegment__lock_interaction = True

        # Update/Create the json file
        self.__create_json_pipeline_file()