def read_timeseries_tail_from_file(filename): def get_next_line(fp): try: return fp.next() except StopIteration: raise ValueError('File {} does not contain a time series'. format(filename)) with ropen(filename) as fp: last_line = get_next_line(fp) datestring = last_line.split(',')[0] try: return iso8601.parse_date(datestring, default_timezone=None) except ValueError as e: exception = e # We were unable to read the last line. Perhaps the time series has no # data? while last_line.isspace(): last_line = get_next_line(fp) # Skip empty lines if '=' in last_line: return None # Last line looks like "name=value" - empty series # No evidence that this is a time series with no data. raise ValueError(exception.message + ' (file {}, last line)'.format(filename))
def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts' % (id, )) if os.path.exists(afilename): if os.path.getsize(afilename) < 3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with ropen(afilename) as fileobject: line = fileobject.readline() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts) > 0: db_start, db_end = ts.bounding_dates() if db_start > lastdate: full_rewrite = True elif db_end > lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex + 1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts'%(id,)) if os.path.exists(afilename): if os.path.getsize(afilename)<3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with ropen(afilename) as fileobject: line = fileobject.readline() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts)>0: db_start, db_end = ts.bounding_dates() if db_start>lastdate: full_rewrite = True elif db_end>lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex+1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def _get_storage_tail_from_file(self, filename, after_timestamp): result = [] reached_after_timestamp = False with ropen(filename, encoding=self.encoding, errors="replace") as xr: prev_timestamp = "" for line in xr: if self._must_ignore_line(line): self.logger.debug("Ignoring line '{}'".format(line)) continue else: self.logger.debug("Parsing line '{}'".format(line)) timestamp = self._extract_timestamp(line).replace(second=0) timestamp = self._fix_dst(timestamp) if timestamp == prev_timestamp: w = "Omitting line with repeated timestamp " + timestamp.isoformat( ) self.logger.warning(w) continue prev_timestamp = timestamp self.logger.debug("Timestamp: " + timestamp.isoformat()) if timestamp <= after_timestamp: reached_after_timestamp = True break result.append({"timestamp": timestamp, "line": line}) result.reverse() return (result, reached_after_timestamp)
def _extract_last_date_from_file(self, filename): with ropen(filename, encoding=self.encoding, errors="replace") as xr: for line in xr: if self._must_ignore_line(line): continue timestamp = self._extract_timestamp(line).replace(second=0) timestamp = self._fix_dst(timestamp) return timestamp return None
def test_simple(self): filename = os.path.join("tests", "data", "simple.txt") with ropen(filename) as f: self.assertEqual(next(f), "Line 7\n") self.assertEqual(next(f), "Line 6\n") self.assertEqual(next(f), "Line 5\n") self.assertEqual(next(f), "Line 4\n") self.assertEqual(next(f), "Line 3\n") self.assertEqual(next(f), "Line 2\n") self.assertEqual(next(f), "Line 1\n") with self.assertRaises(StopIteration): next(f)
def test_utf_noeol_small_buffer(self): filename = os.path.join("tests", "data", "utf_noeol.txt") with ropen(filename, encoding="utf8", bufsize=3) as f: self.assertEqual(next(f), "Γραμμή 7") self.assertEqual(next(f), "Γραμμή 6\n") self.assertEqual(next(f), "Γραμμή 5\n") self.assertEqual(next(f), "Γραμμή 4\n") self.assertEqual(next(f), "Γραμμή 3\n") self.assertEqual(next(f), "Γραμμή 2\n") self.assertEqual(next(f), "Γραμμή 1\n") with self.assertRaises(StopIteration): next(f)
def _set_start_and_end_date(self): if (not self.datafile) or (self.datafile.size < 10): self.start_date_utc = None self.end_date_utc = None return with open(self.datafile.path, "r") as f: self.start_date_utc = iso8601.parse_date( f.readline().split(",")[0], default_timezone=self.time_zone.as_tzinfo ) with ropen(self.datafile.path, bufsize=80) as f: self.end_date_utc = iso8601.parse_date( f.readline().split(",")[0], default_timezone=self.time_zone.as_tzinfo )
def test_simple_readline(self): filename = os.path.join("tests", "data", "simple.txt") with ropen(filename) as f: self.assertEqual(f.readline(), "Line 7\n") self.assertEqual(f.readline(), "Line 6\n") self.assertEqual(f.readline(), "Line 5\n") self.assertEqual(f.readline(), "Line 4\n") self.assertEqual(f.readline(), "Line 3\n") self.assertEqual(f.readline(), "Line 2\n") self.assertEqual(f.readline(), "Line 1\n") self.assertEqual(f.readline(), "") self.assertEqual(f.readline(), "") self.assertEqual(f.readline(), "")
def _get_last_dates(self, filename, n): """ Assuming specified file contains a time series, scan it from the bottom and return the list of the n last dates (may be less than n if the time series is too small). 'filename' is used in error messages. """ # Get the time zone with open(filename) as fp: for line in fp: if line.startswith("Timezone") or (line and line[0] in "0123456789"): break if not line.startswith("Timezone"): raise click.ClickException( "{} does not contain Timezone".format(filename)) zonestr = line.partition("=")[2].strip() timezone = TzinfoFromString(zonestr) result = [] previous_line_was_empty = False with ropen(filename) as fp: for i, line in enumerate(fp): if i >= n: break line = line.strip() # Ignore empty lines if not line: previous_line_was_empty = True continue # Is the line in the form of an ini file configuration line? items = line.split("=") if len(items) and ( "," not in items[0]) and previous_line_was_empty: break # Yes; we reached the start of the file previous_line_was_empty = False datestring = line.split(",")[0] try: result.insert( 0, iso8601.parse_date(datestring, default_timezone=timezone)) except iso8601.ParseError as e: raise iso8601.ParseError( str(e) + " (file {}, {} lines from the end)".format( filename, i + 1)) return result
def get_last_dates(self, filename, n): """ Assuming specified file contains a time series, scan it from the bottom and return the list of the n last dates (may be less than n if the time series is too small). 'filename' is used in error messages. """ # Get the time zone with open(filename) as fp: for line in fp: if line.startswith('Timezone') or ( line and line[0] in '0123456789'): break zonestr = line.partition('=')[2].strip() \ if line.startswith('Timezone') else '' timezone = TzinfoFromString(zonestr) result = [] previous_line_was_empty = False with ropen(filename) as fp: for i, line in enumerate(fp): if i >= n: break line = line.strip() # Ignore empty lines if not line: previous_line_was_empty = True continue # Is the line in the form of an ini file configuration line? items = line.split('=') if len(items) and (',' not in items[0]) \ and previous_line_was_empty: break # Yes; we reached the start of the file previous_line_was_empty = False datestring = line.split(',')[0] try: result.insert(0, iso8601.parse_date( datestring, default_timezone=timezone)) except iso8601.ParseError as e: raise iso8601.ParseError( str(e) + ' (file {}, {} lines from the end)' .format(filename, i + 1)) return result
def _get_tail(self): "Read the part of the datafile after last_timeseries_end_date" self.tail = [] prev_date = '' with ropen(self.filename) as xr: for line in xr: self.logger.debug(line) if line.strip() and self.subset_identifiers_match(line): date = self.extract_date(line).replace(second=0) date = self._fix_dst(date) if date == prev_date: w = 'WARNING: Omitting line with repeated date ' + str( date) self.logger.warning(w) continue prev_date = date self.logger.debug('Date: %s' % (date.isoformat())) if date <= self.last_timeseries_end_date: break self.tail.append({'date': date, 'line': line}) self.tail.reverse()
def append_data(self, data): if not self.datafile: return self.set_data(data) t = self.get_empty_timeseries_object() t.read(data) if not len(t): return 0 with ropen(self.datafile.path, bufsize=80) as f: old_data_end_date = iso8601.parse_date(f.readline().split(',')[0] ).replace(tzinfo=None) new_data_start_date = t.bounding_dates()[0] if old_data_end_date >= new_data_start_date: raise ValueError(( "Cannot append time series: " "its first record ({}) has a date earlier than the last " "record ({}) of the timeseries to append to.") .format(new_data_start_date, old_data_end_date)) with open(self.datafile.path, 'a') as f: t.write(f) self.save() return len(t)
def append_data(self, data): if (not self.datafile) or (os.path.getsize(self.datafile.path) == 0): return self.set_data(data) ahtimeseries = self._get_htimeseries_from_data(data) if not len(ahtimeseries.data): return 0 with ropen(self.datafile.path, bufsize=80) as f: old_data_end_date = iso8601.parse_date(f.readline().split(",")[0]).replace( tzinfo=None ) new_data_start_date = ahtimeseries.data.index[0] if old_data_end_date >= new_data_start_date: raise IntegrityError( ( "Cannot append time series: " "its first record ({}) has a date earlier than the last " "record ({}) of the timeseries to append to." ).format(new_data_start_date, old_data_end_date) ) with open(self.datafile.path, "a") as f: ahtimeseries.write(f) self.save() return len(ahtimeseries.data)
def get_last_line(self): if not self.datafile or self.datafile.size < 10: return "" with ropen(self.datafile.path, bufsize=80) as f: lastline = f.readline() return lastline if len(lastline) > 5 else f.readline()