def __init__(self, report_model, files_name): """ :param list files_name: list of file name .csv :param report_model: object that herit from ReportModel and define the type of Report :type report_model: martwatts.ReportModel """ BaseDB.__init__(self, report_model, False) #: (list): list of file name .csv self.files_name = files_name # intern memory self.tmp = { path_file: { 'next_line': [], 'reader': None, 'file': None } for path_file in self.files_name } #: (int): allow to know if we read a new report, or the same #: current timestamp self.saved_timestamp = utils.timestamp_to_datetime(0)
def connect(self): """ Override from BaseDB. Close file if already open Read first line of all the .csv file and check if the pattern is good. """ # Close file if already opened for path_file in self.files_name: if self.tmp[path_file]['file'] is not None: self.tmp[path_file]['file'].close() # Open all files with csv and read first line for path_file in self.files_name: try: self.tmp[path_file]['file'] = open(path_file) self.tmp[path_file]['reader'] = csv.DictReader( self.tmp[path_file]['file']) except FileNotFoundError as error: raise CsvBadFilePathError(error) self.tmp[path_file]['next_line'] = self._next(path_file) # Check common key for key in KEYS_COMMON: if key not in self.tmp[path_file]['next_line']: raise CsvBadCommonKeysError("Wrong columns keys") # Save the first timestamp self.saved_timestamp = utils.timestamp_to_datetime( int(self.tmp[self.files_name[0]]['next_line']['timestamp']))
def __next__(self) -> Report: """ Allow to get the next data """ # Dict to return json = {} # Get the current timestamp current_timestamp = self.saved_timestamp # For all files for path_file in self.filenames: # While timestamp is lower or equal while True: # Get the next line row = self.tmp_read[path_file]['next_line'] # If nothing more, break if row is None: break # Get the timestamp as datetime row_timestamp = utils.timestamp_to_datetime( int(row['timestamp'])) # If timestamp is higher, we stop here if row_timestamp > current_timestamp: if path_file == self.filenames[0]: self.saved_timestamp = row_timestamp break # Else if it's the same, we merge elif row_timestamp == current_timestamp: utils.dict_merge( json, self.report_model.from_csvdb( path_file.split('/')[-1], row)) # Next line self.tmp_read[path_file]['next_line'] = self._next(path_file) if not json: # Close files for filename in self.filenames: if self.tmp_read[filename]['file'] is not None: self.tmp_read[filename]['file'].close() raise StopIteration() return self.report_model.get_type().deserialize(json)
def __init__(self, current_path="/tmp/csvdbtest"): """ :param current_path: Current path where read/write files """ super().__init__() #: (list): list of file name .csv self.filenames = [] #: (str): current path self.current_path = current_path if current_path[ -1] == '/' else current_path + '/' #: (int): allow to know if we read a new report, or the same #: current timestamp self.saved_timestamp = utils.timestamp_to_datetime(0)
def __init__(self, db, filenames, report_model, stream_mode): """ """ super().__init__(db, report_model, stream_mode) self.filenames = filenames # intern memory for reading # path_file: { # 'next_line': [], # 'reader': None, # 'file': None # } self.tmp_read = {} # Add it in the tmp for filename in filenames: self.tmp_read[filename] = { 'next_line': [], 'reader': None, 'file': None } # Open all files with csv and read first line for filename in self.filenames: try: self.tmp_read[filename]['file'] = open(filename) self.tmp_read[filename]['reader'] = csv.DictReader( self.tmp_read[filename]['file']) except FileNotFoundError as error: raise CsvBadFilePathError(error) self.tmp_read[filename]['next_line'] = self._next(filename) # Check common key for key in CSV_HEADER_COMMON: if key not in self.tmp_read[filename]['next_line']: raise CsvBadCommonKeysError("Wrong columns keys") # Save the first timestamp if self.filenames: self.saved_timestamp = utils.timestamp_to_datetime( int(self.tmp_read[self.filenames[0]]['next_line'] ['timestamp']))