예제 #1
0
 def test_columns_number(self):
     s = (
         r"#COLUMNINFO = 1, m, Diepte bovenkant laag, 1 #COLUMNINFO = 2, m, Diepte onderkant laag, 2"
         r"#COLUMNINFO = 3, mm, Zandmediaan, 8"
         r"#COLUMNINFO = 4, mm, Grindmediaan, 9")
     v = utils.parse_columns_number(s)
     self.assertEqual(v, 4)
예제 #2
0
    def __init__(self, header_s, data_s):
        """
        Parser of the borehole file.

        :param header_s: (str) Header of the file
        :param data_s: (str) Data of the file
        """
        self.type = 'bore'
        self.project_id = utils.parse_project_type(header_s, 'bore')

        columns_number = utils.parse_columns_number(header_s)
        column_separator = utils.parse_column_separator(header_s)
        record_separator = utils.parse_record_separator(header_s)
        data_s_rows = data_s.split(record_separator)
        data_rows_soil = self.extract_soil_info(data_s_rows, columns_number,
                                                column_separator)

        self.df = (self.parse_data_column_info(
            header_s, data_s, column_separator, columns_number).pipe(
                self.parse_data_soil_code, data_rows_soil).pipe(
                    self.parse_data_soil_type, data_rows_soil).pipe(
                        self.parse_add_info_as_string, data_rows_soil)).join(
                            self.data_soil_quantified(data_rows_soil))[[
                                'depth_top', 'depth_bottom', 'Soil_code',
                                'Gravel', 'Sand', 'Clay', 'Loam', 'Peat',
                                'Silt'
                            ]]

        self.df.columns = [
            'depth_top', 'depth_bottom', 'soil_code', 'G', 'S', 'C', 'L', 'P',
            'SI'
        ]
예제 #3
0
    def __init__(self, path=None, string=None):
        """
        Parser of the borehole file.

        Parameters
        ----------
        path: str
            Path to the *.gef file.
        string: str
            String version of the *.gef file.
        """
        super().__init__(path=path, string=string)
        if self.type == "bore":
            pass
        elif self.type == "borehole-report":
            raise ValueError(
                "The selected gef file is a GEF-BOREHOLE-Report. Can only parse "
                "GEF-CPT-Report and GEF-BORE-Report. Check the PROCEDURECODE.")
        else:
            raise ValueError("The selected gef file is not a borehole. "
                             "Check the REPORTCODE or the PROCEDURECODE.")

        self.project_id = utils.parse_project_type(self._headers, "bore")
        self.nen_version = "NEN 5104"

        # This is usually not correct for the boringen
        columns_number = utils.parse_columns_number(self._headers)
        column_separator = utils.parse_column_separator(self._headers)
        record_separator = utils.parse_record_separator(self._headers)
        data_s_rows = self._data.split(record_separator)
        data_rows_soil = self.extract_soil_info(data_s_rows, columns_number,
                                                column_separator)

        self.df = (self.parse_data_column_info(
            self._headers, self._data, column_separator, columns_number).pipe(
                self.parse_data_soil_code, data_rows_soil).pipe(
                    self.parse_data_soil_type, data_rows_soil).pipe(
                        self.parse_add_info_as_string,
                        data_rows_soil).pipe(self.parse_soil_quantification,
                                             data_rows_soil))

        # Drop the columns if they exist, do nothing if they don't
        for column in [
                "sand_median",
                "gravel_median",
                "lutum_percentage",
                "silt_percentage",
                "sand_percentage",
                "gravel_percentage",
                "organic_matter_percentage",
                "soil_type",
        ]:
            if column in self.df.columns:
                self.df.drop_in_place(column)
예제 #4
0
def determine_column_names(headers, columns_number=None, columns_info=None):
    if columns_number is None and columns_info is None:
        columns_number = utils.parse_columns_number(headers)
        if columns_number is not None:
            columns_info = []
            for column_number in range(1, columns_number + 1):
                columns_info.append(
                    utils.parse_column_info(
                        headers, column_number,
                        MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT))

    return columns_info
예제 #5
0
    def __init__(self, header_s, data_s):
        """
        Parser of the borehole file.

        :param header_s: (str) Header of the file
        :param data_s: (str) Data of the file
        """
        self.type = "bore"
        self.project_id = utils.parse_project_type(header_s, "bore")

        # This is usually not correct for the boringen
        columns_number = utils.parse_columns_number(header_s)
        column_separator = utils.parse_column_separator(header_s)
        record_separator = utils.parse_record_separator(header_s)
        data_s_rows = data_s.split(record_separator)
        data_rows_soil = self.extract_soil_info(
            data_s_rows, columns_number, column_separator
        )

        self.df = (
            self.parse_data_column_info(
                header_s, data_s, column_separator, columns_number
            )
            .pipe(self.parse_data_soil_code, data_rows_soil)
            .pipe(self.parse_data_soil_type, data_rows_soil)
            .pipe(self.parse_add_info_as_string, data_rows_soil)
        ).join(self.data_soil_quantified(data_rows_soil))[
            [
                "depth_top",
                "depth_bottom",
                "Soil_code",
                "Gravel",
                "Sand",
                "Clay",
                "Loam",
                "Peat",
                "Silt",
                "remarks",
            ]
        ]
        self.df.columns = [
            "depth_top",
            "depth_bottom",
            "soil_code",
            "G",
            "S",
            "C",
            "L",
            "P",
            "SI",
            "Remarks",
        ]
예제 #6
0
파일: gef.py 프로젝트: ThomasSweijen/pygef
 def parse_data(header_s, data_s, columns_number=None, columns_info=None):
     df = {}
     if columns_number is None and columns_info is None:
         columns_number = utils.parse_columns_number(header_s)
         if columns_number is not None:
             columns_info = []
             for column_number in range(1, columns_number + 1):
                 column_info = utils.parse_column_info(
                     header_s, column_number,
                     MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT)
                 columns_info.append(column_info)
             df = pd.read_csv(io.StringIO(data_s.replace('!', '')),
                              sep=r';|\s+|,|\|\s*',
                              names=columns_info,
                              index_col=False,
                              engine='python')
     return df
예제 #7
0
 def parse_data(header_s, data_s, columns_number=None, columns_info=None):
     if columns_number is None and columns_info is None:
         columns_number = utils.parse_columns_number(header_s)
         if columns_number is not None:
             columns_info = []
             for column_number in range(1, columns_number + 1):
                 column_info = utils.parse_column_info(
                     header_s, column_number, MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT
                 )
                 columns_info.append(column_info)
     new_data = data_s.replace("!", "")
     separator = utils.find_separator(header_s)
     return pd.read_csv(
         io.StringIO(new_data),
         sep=separator,
         names=columns_info,
         index_col=False,
         engine="python",
     )
예제 #8
0
파일: gef.py 프로젝트: ThomasSweijen/pygef
    def __init__(self, path=None, string=None):
        """
        Parser of the borehole file.

        :param path:(str) Path of the .gef file to parse.
        :param string:(str) String to parse.
        """
        self.path = path
        self.s = string
        self.zid = None  # ground level
        self.x = None
        self.y = None
        self.type = None
        self.end_depth_of_penetration_test = None
        self.project_id = None
        self.column_separator = None
        self.record_separator = None
        self.file_date = None
        self.project_id = None
        self.type = None

        # List of all the possible measurement variables

        if self.s is None:
            with open(path, encoding='utf-8', errors='ignore') as f:
                self.s = f.read()

        end_of_header = utils.parse_end_of_header(self.s)
        header_s, data_s = self.s.split(end_of_header)

        columns_number = utils.parse_columns_number(header_s)
        self.file_date = utils.parse_file_date(header_s)
        self.project_id = utils.parse_project_type(header_s, self.type)
        self.type = utils.parse_gef_type(header_s)
        self.x = utils.parse_xid_as_float(header_s)
        self.y = utils.parse_yid_as_float(header_s)
        self.zid = utils.parse_zid_as_float(header_s)
        column_separator = utils.parse_column_separator(header_s)
        record_separator = utils.parse_record_separator(header_s)
        data_s_rows = data_s.split(record_separator)
        data_rows_soil = self.extract_soil_info(data_s_rows, columns_number,
                                                column_separator)
        df_column_info = self.parse_data_column_info(header_s, data_s,
                                                     column_separator,
                                                     columns_number)
        df_soil_type = self.parse_data_soil_type(data_rows_soil)
        df_soil_code = self.parse_data_soil_code(data_rows_soil)
        df_soil_quantified = self.data_soil_quantified(data_rows_soil)
        df_additional_info = self.parse_add_info_as_string(data_rows_soil)
        df_bore_more_info = pd.concat([
            df_column_info, df_soil_code, df_soil_type, df_soil_quantified,
            df_additional_info
        ],
                                      axis=1,
                                      sort=False)
        self.df = df_bore_more_info[[
            'depth_top', 'depth_bottom', 'Soil_code', 'Gravel', 'Sand', 'Clay',
            'Loam', 'Peat', 'Silt'
        ]]
        self.df.columns = [
            'depth_top', 'depth_bottom', 'soil_code', 'G', 'S', 'C', 'L', 'P',
            'S'
        ]