Example #1
0
    def from_csv(self, qc_tests=False):
        """
        It opens a csv file that contains data from an HOBO instrument.

        Parameters
        ----------
            model: str
                Model of the instrument.

            qc_tests: bool (optional)
                It indicates if QC test should be passed.

        Returns
        -------
            wf: WaterFrame """

        # open file
        self.f = open(self.path, mode='rt', encoding='utf-8-sig')

        # declare metadata variables
        self.title, self.sn = None, None

        # declare index variables
        self.timestamp = None

        # declare dict to save name, long_name and units
        self.name = {
            'temp': [None, None, None],
            'pres': [None, None, None],
            'rh': [None, None, None],
            'batt': [None, None, None]
        }

        def find_units(header):
            header_units = header.split(",", 2)[1].strip()
            units = header_units.split(" ")[0].strip()
            return units

        def find_name(header):
            header_name = header.rsplit(",", 1)[1].strip(" )")
            if header_name.split(":")[0].strip() == "LBL":
                name = header_name.split(":")[1].strip()
            else:
                name = header.split(",", 1)[0].strip()
            return name

        def find_long_name(header):
            long_name = header.split(",", 1)[0].strip()
            return long_name

        def find_col_timestamp(headers):
            for i, header in enumerate(headers):
                if 'Date Time' in header or 'Fecha Tiempo' in header:
                    return i

        def find_col_temperature(headers):
            for i, header in enumerate(headers):
                if 'High Res. Temp.' in header or 'High-Res Temp' in header:
                    self.name['temp'] = [
                        find_name(header),
                        find_long_name(header),
                        find_units(header)
                    ]
                    # self.temp_units = find_units(header)
                    # self.temp = find_name(header)
                    # return 'TEMP'
            for i, header in enumerate(headers):
                for s in ('Temp,', 'Temp.', 'Temperature'):
                    if s in header:
                        self.name['temp'] = [
                            find_name(header),
                            find_long_name(header),
                            find_units(header)
                        ]
                        # self.temp_units = find_units(header)
                        # self.temp = find_name(header)
                        # return 'TEMP'

        def find_col_preassure(headers):
            for i, header in enumerate(headers):
                if 'Pres abs,' in header:
                    self.name['pres'] = [
                        find_name(header),
                        find_long_name(header),
                        find_units(header)
                    ]
                    # self.pres = find_units(header)
                    # self.pres = find_name(header)
                    # return 'PRES'

        def find_col_rh(headers):
            for i, header in enumerate(headers):
                if 'RH,' in header:
                    self.name['rh'] = [
                        find_name(header),
                        find_long_name(header),
                        find_units(header)
                    ]
                    # return 'RH'

        def find_col_battery(headers):
            for i, header in enumerate(headers):
                if 'Batt, V' in header:
                    self.name['batt'] = [
                        find_name(header),
                        find_long_name(header),
                        find_units(header)
                    ]
                    # return 'BATT'

        def find_columns(header):
            """ Find and set column names for headers """
            headers = next(csv.reader(StringIO(header)))
            self.headers = headers
            self.timestamp = find_col_timestamp(headers)
            find_col_temperature(headers)
            find_col_preassure(headers)
            find_col_rh(headers)
            find_col_battery(headers)

        def find_headers():
            while self.timestamp is None:
                header = next(self.f)
                if self.title is None:
                    self.title = header.strip().split(":")
                if self.sn is None:
                    sn_match = SN_REGEX.search(header)
                    self.sn = sn_match.groups()[0] if sn_match else None
                find_columns(header)
            return header

        # find headers
        find_headers()

        # Creation of a WaterFrame
        wf = WaterFrame()
        metadata = {}
        metadata[self.title[0]] = self.title[1].strip()
        metadata["S/N"] = self.sn.strip()

        # Load metadata to Waterframe
        wf.metadata = metadata

        # Create dataframe from csv
        df = pd.read_csv(self.f, names=self.headers, index_col=self.timestamp)

        df = df.replace(np.nan, '', regex=True)

        # Set index to datetime
        df.index = pd.to_datetime(df.index)
        df.set_index(pd.DatetimeIndex(df.index, inplace=True))

        # Rename index
        df.index.name = 'Time'

        # Rename columns
        for col in df.columns:
            if 'Temp' in col:
                df.rename(columns={col: self.name['temp'][0]}, inplace=True)
            elif 'Pres' in col:
                df.rename(columns={col: self.name['pres'][0]}, inplace=True)
            elif 'Batt' in col:
                df.rename(columns={col: self.name['batt'][0]}, inplace=True)
            elif 'RH' in col:
                df.rename(columns={col: self.name['rh'][0]}, inplace=True)

        # Filter columns only if they are present
        df = df.filter(items=[
            self.name['temp'][0], self.name['pres'][0], self.name['batt'][0],
            self.name['rh'][0]
        ])

        # Add QC keys
        for key in df.keys():
            df["{}_QC".format(key)] = 0

        # Add DataFrame into the WaterFrame
        wf.data = df.copy()

        # Change parameter names and add QC columns
        for parameter in wf.parameters():
            for key, value in self.name.items():
                if value[0] == parameter:
                    wf.meaning[parameter] = {
                        "long_name": value[1],
                        "units": value[2]
                    }

        # Creation of QC Flags following OceanSites recomendation
        if qc_tests:
            for parameter in wf.parameters():
                # Reset QC Flags to 0
                wf.reset_flag(key=parameter, flag=0)
                # Flat test
                wf.flat_test(key=parameter, window=0, flag=4)
                # Spike test
                wf.spike_test(key=parameter, window=0, threshold=3, flag=4)
                # Range test
                wf.range_test(key=parameter, flag=4)
                # Change flags from 0 to 1
                wf.flag2flag(key=parameter, original_flag=0, translated_flag=1)
        print(wf.data)
        print(wf)
        return wf
Example #2
0
    def from_txt(model, path, qc_tests=False):
        """
        Parameters
        ----------
            model: str
                Model of the instrument.
            path: str
                Path of the txt file.
            qc_tests: bool (optional)
                It indicates if QC test should be passed.

        Returns
        -------
            wf: WaterFrame
        """
        # Creation of a WaterFrame
        wf = WaterFrame()
        metadata = {}

        # The arguments of pandas.read_csv could change depending on the
        # source.
        if model == "LI-192":
            # lines to skip (int) at the start of the file.
            skiprows = 6
            # format_time = '%d/%m/%Y %H:%M:%S'

            # Add metadata info
            with open(path, encoding='utf-8-sig') as txtfile:
                # Read 2 first lines from csv
                for row in itertools.islice(txtfile, 1, 6):
                    # Delete return carriage from row
                    row = ''.join(row.splitlines())
                    # Split row by "\t" and remove empty strings returned in
                    # split()
                    parts = row.split(":")
                    if "Timestamp" not in parts[0]:
                        metadata[parts[0]] = parts[1].strip()

            # Load metadata to waterframe
            wf.metadata = metadata

            # Load data from table into a DataFrame
            df = pd.read_table(path,
                               delim_whitespace=True,
                               skiprows=skiprows,
                               low_memory=False)
            df['Ns'] = pd.to_numeric(df['Nanoseconds'],
                                     errors='coerce') / 1000000000
            df['Input1'] = pd.to_numeric(df['Input1'], errors='coerce')

            # Create the time index. Convert "Nanoseconds" to seconds, add to
            # column "Seconds"
            # and convert to datetime
            df['Seconds_total'] = df['Ns'] + pd.to_numeric(df['Seconds'],
                                                           errors='coerce')
            df['TIME'] = pd.to_datetime(df['Seconds_total'],
                                        unit='s',
                                        errors='coerce')
            df.set_index(df['TIME'], inplace=True)
            df.drop([
                'DATAH', 'Record', 'Seconds', 'Nanoseconds', 'Ns',
                'Seconds_total', 'TIME', 'MULT_1', 'CHK'
            ],
                    inplace=True,
                    axis=1)

            # Add DataFrame into the WaterFrame
            wf.data = df.copy()

            # Change parameter names and add QC columns
            for key in wf.data.keys():
                if key == "Input1":
                    wf.data.rename(columns={"Input1": "PPFD"}, inplace=True)
                    wf.data["PPFD_QC"] = 0
                    wf.meaning['PPFD'] = {
                        "long_name": "Photosynthetic Photon Flux Density",
                        "units": "µMol/M^2S"
                    }

            # Creation of QC Flags following OceanSites recomendation
            if qc_tests:
                for parameter in wf.parameters():
                    # Reset QC Flags to 0
                    wf.reset_flag(key=parameter, flag=0)
                    # Flat test
                    wf.flat_test(key=parameter, window=0, flag=4)
                    # Spike test
                    wf.spike_test(key=parameter, window=0, threshold=3, flag=4)
                    # Range test
                    wf.range_test(key=parameter, flag=4)
                    # Change flags from 0 to 1
                    wf.flag2flag(key=parameter,
                                 original_flag=0,
                                 translated_flag=1)

        else:
            warnings.warn("Unknown model")
            return

        # resample to seconds
        wf.resample('S')

        return wf