Exemple #1
0
class CSVConverterADCP(CSVConverter):
    def __init__(self, logger):
        self.logger = logger
        self.parser = ParserADCP(self.logger)
        self.pandas_tools = PandasTools(self.parser, self.logger)

    def input_csv_to_df(self, input_file, date_range):
        csv_converter = CSVConverter(self.parser, self.logger)
        # "TAO_T0N110W_D_ADCP.ascii" => "T0N110W"
        column_str = input_file.filename.split("_")[1]
        columns = self.columns(column_str)
        csv_converter.input_csv_to_df(input_file, date_range, columns,
                                      column_str)
        self.pandas_tools.concat_df(csv_converter.pandas_tools.df)

    @classmethod
    def columns(cls, column_str):
        cols = []
        for depth in range(10, 321, 5):  # [10, 15, 20, ..., 310, 315, 320]
            for component in ['UCUR', 'VCUR', 'WCUR']:
                col = column_str + "_" + str(int(depth)) + "_" + component
                cols.append(col)
        return cols

    def df_to_output_csv(self, output_path, output_filename):
        output_file = CSVWriter(output_path, output_filename)
        output_file.write_row(['DATASET:', self.parser.NAME])
        columns = ['Timestamp']
        columns.extend(self.pandas_tools.df.columns)
        output_file.write_row(columns)
        self.pandas_tools.df_to_csv(output_file)
        output_file.close()
class CSVConverterSolarAnywhere(CSVConverter):
    def __init__(self, logger):
        self.logger = logger
        self.parser = ParserSolarAnywhere(self.logger)
        self.pandas_tools = PandasTools(self.parser, self.logger)

    def input_csv_to_df(self, input_file, date_range, plot_output_path):
        csv_converter = CSVConverter(self.parser, self.logger)
        # "0_0.csv" => "0_0"
        column_str = input_file.filename.split(".")[0]
        columns = [
            column_str + '_' + column for column in ['GHI', 'DNI', 'DHI']
        ]
        csv_converter.input_csv_to_df(input_file, date_range, columns)
        df = csv_converter.pandas_tools.df.copy()
        ParserSolarAnywhere.plot(plot_output_path, input_file.filename, df)
        self.pandas_tools.concat_df(csv_converter.pandas_tools.df)

    def df_to_output_csv(self, output_path, output_filename):
        output_file = CSVWriter(output_path, output_filename)
        output_file.write_row(['DATASET:', self.parser.NAME])
        columns = ['Timestamp']
        columns.extend(self.pandas_tools.df.columns)
        output_file.write_row(columns)
        self.pandas_tools.df_to_csv(output_file)
        output_file.close()
 def __init__(self, parser, logger):
     self.parser = parser
     self.logger = logger
     self.pandas_tools = PandasTools(self.parser, self.logger)
     # initialized in the input_csv_to_df method
     self.input_file = None
     self.date_range = None
     self.adcp = None
     self.repeat_timestamp = None
Exemple #4
0
    def buoy_start(self, new_buoy_id):
        print "new buoy", new_buoy_id
        columns = [
            "lat", "long", "ZonWinds", "MerWinds", "Humidity", "AirTemp", "SST"
        ]
        columns = ["%s_%s" % (new_buoy_id, column) for column in columns]

        if new_buoy_id != 1:  # this is only False the first time this method is called
            self.buoy_end()

        self.current_pandas_tools = PandasTools(self.parser, self.logger)
        self.current_pandas_tools.new_df(columns, False)
Exemple #5
0
class CSVConverterNOAA(CSVConverter):
    def __init__(self, logger):
        self.logger = logger
        self.parser = ParserNOAA(self.logger)
        self.pandas_tools = PandasTools(self.parser, self.logger)

    def input_csv_to_df(self, input_file, date_range):
        csv_converter = CSVConverter(self.parser, self.logger)
        # "TAO_T5N140W_D_SST_10min.ascii" => "T5N140W"
        column = input_file.filename.split("_")[1]
        csv_converter.input_csv_to_df(input_file, date_range, [column])
        self.pandas_tools.concat_df(csv_converter.pandas_tools.df)

    def df_to_output_csv(self, output_path, output_filename):
        output_file = CSVWriter(output_path, output_filename)
        output_file.write_row(['DATASET:', self.parser.NAME])
        columns = ['Timestamp']
        columns.extend(self.pandas_tools.df.columns)
        output_file.write_row(columns)
        self.pandas_tools.df_to_csv(output_file)
        output_file.close()
 def __init__(self, logger):
     self.logger = logger
     self.parser = ParserSolarAnywhere(self.logger)
     self.pandas_tools = PandasTools(self.parser, self.logger)
Exemple #7
0
 def __init__(self, parser, logger):
     self.parser = parser
     self.logger = logger
     self.pandas_tools = PandasTools(self.parser, self.logger)
     self.current_pandas_tools = None
Exemple #8
0
class DataFrameUtils:
    def __init__(self, parser, logger):
        self.parser = parser
        self.logger = logger
        self.pandas_tools = PandasTools(self.parser, self.logger)
        self.current_pandas_tools = None

    def buoy_start(self, new_buoy_id):
        print "new buoy", new_buoy_id
        columns = [
            "lat", "long", "ZonWinds", "MerWinds", "Humidity", "AirTemp", "SST"
        ]
        columns = ["%s_%s" % (new_buoy_id, column) for column in columns]

        if new_buoy_id != 1:  # this is only False the first time this method is called
            self.buoy_end()

        self.current_pandas_tools = PandasTools(self.parser, self.logger)
        self.current_pandas_tools.new_df(columns, False)

    def add_row(self, timestamp, row):
        self.current_pandas_tools.add_row(timestamp, row)

    def buoy_end(self):
        self.pandas_tools.concat_df(self.current_pandas_tools.df)

    def df_to_output_csv(self, output_path, output_filename):
        # print stats
        self.pandas_tools.print_stats()
        # output to file
        output_file = CSVWriter(output_path, output_filename)
        output_file.write_row(['DATASET:', self.parser.NAME])
        output_file.write_row(['TIME UNIT:', 'hours'])
        output_file.write_row(
            ['FIRST TIMESTAMP:',
             self.pandas_tools.first_timestamp()])
        row = ['Time Delta']
        row.extend(self.pandas_tools.df.columns)
        output_file.write_row(row)
        self.pandas_tools.df_to_csv(output_file, 'hours')
        output_file.close()
class CSVConverter:
    def __init__(self, parser, logger):
        self.parser = parser
        self.logger = logger
        self.pandas_tools = PandasTools(self.parser, self.logger)
        # initialized in the input_csv_to_df method
        self.input_file = None
        self.date_range = None
        self.adcp = None
        self.repeat_timestamp = None

    #
    # repeat_timestamp is True if it is expected that the same timestamp can be repeated
    #
    def input_csv_to_df(self,
                        input_file,
                        date_range=None,
                        columns=None,
                        adcp=False,
                        repeat_timestamp=False):
        self.input_file = input_file
        self.date_range = date_range
        self.adcp = adcp
        self.repeat_timestamp = repeat_timestamp
        while self.parser.parsing_header:
            line = self.input_file.read_line()
            self.parser.parse_header(line)
        cols = self.parser.columns if columns is None else columns
        self.pandas_tools.new_df(cols, repeat_timestamp)
        self._write_data()
        self.input_file.close()

    def print_stats(self):
        self.pandas_tools.print_stats()

    def df_to_output_csv(self, output_path, output_filename):
        if self.pandas_tools.is_empty_df():
            return
        output_file = CSVWriter(output_path, output_filename)
        output_file.write_row(['DATASET:', self.parser.name])
        output_file.write_row(['TIME UNIT:', self.parser.time_unit])
        output_file.write_row(
            ['FIRST TIMESTAMP:',
             self.pandas_tools.first_timestamp()])
        output_file.write_row(['Time Delta'] + self.parser.columns)
        self.pandas_tools.df_to_csv(output_file, self.parser.time_unit)
        output_file.close()

    def plot(self, output_path):
        if self.pandas_tools.is_empty_df():
            return
        self.parser.plot(output_path, self.input_file.filename,
                         self.pandas_tools.df)

    ####################################################################################################################
    ####################################################################################################################
    ####################################################################################################################

    def _write_data(self):
        self.previous_timestamp, self.previous_values = None, None
        while self.input_file.continue_reading:
            self._process_line()

    def _process_line(self):
        row = None
        while row is None:
            line = self.input_file.read_line()
            row = self.parser.parse_data(
                line
            )  # { 'timestamp': datetime object, 'values': values array }
        self.timestamp, self.values = row['timestamp'], row['values']
        if self.date_range and not self.date_range.inside_range(
                self.timestamp):
            return
        if self.adcp is False:
            if not self.repeat_timestamp:
                self.print_warning()
            self.pandas_tools.add_row(self.timestamp, self.values)
        else:
            if self.previous_timestamp and self.previous_timestamp.day != self.timestamp.day:
                print(self.timestamp)
            self.pandas_tools.add_row(self.timestamp, self.values, self.adcp)

        self.previous_timestamp, self.previous_values = self.timestamp, self.values

    def print_warning(self):
        # first row OR positive timestamp delta
        if self.previous_timestamp is None or self.timestamp > self.previous_timestamp:
            return
        elif self.timestamp < self.previous_timestamp:
            self._print_state("WARNING: timestamp < previous_timestamp")
        else:  # self.timestamp == self.previous_timestamp:
            if self.values == self.previous_values:
                self._print_state(
                    "WARNING: timestamp == previous_timestamp => duplicate row"
                )
            else:
                self._print_state(
                    "WARNING: timestamp == previous_timestamp => different row"
                )

    def _print_state(self, message=None):
        if message is not None:
            self.logger.info(message)
        self.logger.info('previous_timestamp =%s', self.previous_timestamp)
        self.logger.info('timestamp =%s', self.timestamp)
        self.logger.info('previous_values =%s', self.previous_values)
        self.logger.info('values =%s', self.values)
        self.logger.info('')
Exemple #10
0
 def __init__(self, logger):
     self.logger = logger
     self.parser = ParserNOAA(self.logger)
     self.pandas_tools = PandasTools(self.parser, self.logger)