class DataFrameUtils: def __init__(self, parser, logger): self.parser = parser self.logger = logger self.pandas_tools = PandasTools(self.parser, self.logger) self.current_pandas_tools = None def buoy_start(self, new_buoy_id): print "new buoy", new_buoy_id columns = [ "lat", "long", "ZonWinds", "MerWinds", "Humidity", "AirTemp", "SST" ] columns = ["%s_%s" % (new_buoy_id, column) for column in columns] if new_buoy_id != 1: # this is only False the first time this method is called self.buoy_end() self.current_pandas_tools = PandasTools(self.parser, self.logger) self.current_pandas_tools.new_df(columns, False) def add_row(self, timestamp, row): self.current_pandas_tools.add_row(timestamp, row) def buoy_end(self): self.pandas_tools.concat_df(self.current_pandas_tools.df) def df_to_output_csv(self, output_path, output_filename): # print stats self.pandas_tools.print_stats() # output to file output_file = CSVWriter(output_path, output_filename) output_file.write_row(['DATASET:', self.parser.NAME]) output_file.write_row(['TIME UNIT:', 'hours']) output_file.write_row( ['FIRST TIMESTAMP:', self.pandas_tools.first_timestamp()]) row = ['Time Delta'] row.extend(self.pandas_tools.df.columns) output_file.write_row(row) self.pandas_tools.df_to_csv(output_file, 'hours') output_file.close()
class CSVConverter: def __init__(self, parser, logger): self.parser = parser self.logger = logger self.pandas_tools = PandasTools(self.parser, self.logger) # initialized in the input_csv_to_df method self.input_file = None self.date_range = None self.adcp = None self.repeat_timestamp = None # # repeat_timestamp is True if it is expected that the same timestamp can be repeated # def input_csv_to_df(self, input_file, date_range=None, columns=None, adcp=False, repeat_timestamp=False): self.input_file = input_file self.date_range = date_range self.adcp = adcp self.repeat_timestamp = repeat_timestamp while self.parser.parsing_header: line = self.input_file.read_line() self.parser.parse_header(line) cols = self.parser.columns if columns is None else columns self.pandas_tools.new_df(cols, repeat_timestamp) self._write_data() self.input_file.close() def print_stats(self): self.pandas_tools.print_stats() def df_to_output_csv(self, output_path, output_filename): if self.pandas_tools.is_empty_df(): return output_file = CSVWriter(output_path, output_filename) output_file.write_row(['DATASET:', self.parser.name]) output_file.write_row(['TIME UNIT:', self.parser.time_unit]) output_file.write_row( ['FIRST TIMESTAMP:', self.pandas_tools.first_timestamp()]) output_file.write_row(['Time Delta'] + self.parser.columns) self.pandas_tools.df_to_csv(output_file, self.parser.time_unit) output_file.close() def plot(self, output_path): if self.pandas_tools.is_empty_df(): return self.parser.plot(output_path, self.input_file.filename, self.pandas_tools.df) #################################################################################################################### #################################################################################################################### #################################################################################################################### def _write_data(self): self.previous_timestamp, self.previous_values = None, None while self.input_file.continue_reading: self._process_line() def _process_line(self): row = None while row is None: line = self.input_file.read_line() row = self.parser.parse_data( line ) # { 'timestamp': datetime object, 'values': values array } self.timestamp, self.values = row['timestamp'], row['values'] if self.date_range and not self.date_range.inside_range( self.timestamp): return if self.adcp is False: if not self.repeat_timestamp: self.print_warning() self.pandas_tools.add_row(self.timestamp, self.values) else: if self.previous_timestamp and self.previous_timestamp.day != self.timestamp.day: print(self.timestamp) self.pandas_tools.add_row(self.timestamp, self.values, self.adcp) self.previous_timestamp, self.previous_values = self.timestamp, self.values def print_warning(self): # first row OR positive timestamp delta if self.previous_timestamp is None or self.timestamp > self.previous_timestamp: return elif self.timestamp < self.previous_timestamp: self._print_state("WARNING: timestamp < previous_timestamp") else: # self.timestamp == self.previous_timestamp: if self.values == self.previous_values: self._print_state( "WARNING: timestamp == previous_timestamp => duplicate row" ) else: self._print_state( "WARNING: timestamp == previous_timestamp => different row" ) def _print_state(self, message=None): if message is not None: self.logger.info(message) self.logger.info('previous_timestamp =%s', self.previous_timestamp) self.logger.info('timestamp =%s', self.timestamp) self.logger.info('previous_values =%s', self.previous_values) self.logger.info('values =%s', self.values) self.logger.info('')