def walk_files(self, args): nr, files = args old_logger = self.logger self.logger = self.logger.getChild(str(nr)) self.logger.info(__("{} starting", nr)) with self.new_client() as client: row_count = 0 last_save = -1 has_tmp_file = False for file in progress(files, logger=self.logger): try: if last_save != row_count: with open(self.checkpoint_copy_file.format(nr), "wb") as f: pickle.dump(files, f) last_save = row_count has_tmp_file = True row_count += self.parse_file(client, file) if has_tmp_file: os.replace(self.checkpoint_copy_file.format(nr), self.checkpoint_file.format(nr)) has_tmp_file = False except: self.logger.error(__("In file {}", file)) raise self.logger.info(__("finished reading {} rows", row_count)) self.logger = old_logger return row_count
def preprocess_cycle(nr, client, queue, sname, selector, fields, detector, dry_run=False): logger.info(__("Processing #{}: {} {}", nr, detector.attr, sname)) stream = client.stream_params("samples", fields=fields, where=selector, group_order_by="ORDER BY time ASC") stream = progress(stream, delay=4, remote=queue.put) cycles, cycles_disc = detector(stream) if not dry_run: logger.info(__("Writing {} + {} = {} cycles", len(cycles), len(cycles_disc), len(cycles) + len(cycles_disc))) client.write_points( detector.cycles_to_timeseries(cycles + cycles_disc, "charge_cycles"), tags={'detector': detector.attr}, time_precision=client.time_epoch) logger.info(__("Task #{}: {} {} completed", nr, detector.attr, sname)) return detector.attr, nr, len(cycles), len(cycles_disc)
def _insert_into_db_and_archive_logs(path_and_data: Iterator[Tuple[Directory, File, Data]], queue: Queue = None): """ :param path_and_data: an iterator over directories, log file names of their data """ if arguments["--archive"]: logger.info("Start archiving all files") else: logger.info("Start uploading log files") with influxdb.connect(**config["webike.influx"]) as client: for directory, filename, data in progress(path_and_data, delay=10, remote=queue.put): # noinspection PyBroadException try: if arguments["--archive"]: _archive_log(directory, filename) elif data is not None: logger.debug(__("Upload file {file}", file=filename)) logger.debug(data) client.write(data, {"db": config["webike.influx.database"]}) _archive_log(directory, filename) elif arguments["--strict"]: _move_to_problem_folder(directory, filename) # try to import as many logs as possible, so just log any unexpected exceptions and keep going except KeyboardInterrupt: logger.error( __("Interrupted by user at file {filename} in {directory}", filename=filename, directory=directory.name)) raise except Exception as exception: logger.exception( __("Error with file {filename} in {directory}:", filename=filename, directory=directory.name)) _move_to_problem_folder(directory, filename)
def _read_rowdata_packet(self): """Read a rowdata packet for each data row in the result set.""" rows = [] for _ in progress(itertools.count(), logger=logger, level=logging.DEBUG, verb="Got", objects="rows"): # == while True packet = self.connection._read_packet() if self._check_packet_is_eof(packet): break rows.append(self._read_row_from_packet(packet)) self.affected_rows = len(rows) self.rows = tuple(rows) dur = time.perf_counter() - self.connection._query_start if dur > 4: logger.debug( __("Took {:.2f}s for executing query affecting {:,} rows", dur, len(rows))) self.connection = None # release reference to kill cyclic reference.
def preprocess_trip(nr, client, queue, sname, sselector, dry_run=False): logger.info(__("Processing #{}: {}", nr, sname)) detector = TripDetection(time_epoch=client.time_epoch) stream = client.stream_params( "samples", fields="time, veh_speed, participant, veh_odometer, hvbatt_soc, outside_air_temp, " "fuel_rate, hvbatt_current, hvbatt_voltage, hvbs_cors_crnt, hvbs_fn_crnt", where=join_selectors([sselector, "veh_speed > 0"]), group_order_by="ORDER BY time ASC" ) stream = progress(stream, delay=4, remote=queue.put) cycles, cycles_disc = detector(stream) if not dry_run: logger.info(__("Writing {} + {} = {} trips", len(cycles), len(cycles_disc), len(cycles) + len(cycles_disc))) client.write_points( detector.cycles_to_timeseries(cycles + cycles_disc, "trips"), tags={'detector': detector.attr}, time_precision=client.time_epoch) logger.info(__("Task #{}: {} completed", nr, sname)) return nr, len(cycles), len(cycles_disc)
def analyze(root): headers = Counter() ids = {} files_with_3_infos = [] for path in progress(SafeFileWalker(root)): try: m = re.search('Participant ([0-9]{2}b?)', path) participant = m.group(1) if participant == "10b": participant = 11 else: participant = int(participant) if participant not in range(1, 12): logger.warning(__("Illegal participant {} from file {}", participant, path)) size = os.stat(path).st_size with open(path, 'rb') as f: first = f.readline().decode() second = f.readline().decode() last = None if f.tell() < size: offs = -100 while True: f.seek(max(offs, -size), 2) lines = f.readlines() if len(lines) > 1: last = lines[-1].decode() break if -offs > size: break offs *= 2 if "Trip" in first: logger.warning(__("Skipping trip file {}", path)) continue header = first.strip().split(",") if header[0] != "Timestamp": logger.warning(__("Illegal header row in {}:1 '{}'", path, first.strip())) headers.update(header) infos = second.strip().split(",") if len(infos) != 3 or len(infos[2]) != 0: if infos[2] in FW3I_VALUES and FW3I_FOLDER in path: files_with_3_infos.append(path) else: logger.warning(__("Invalid info in {}:2 '{}'", path, second.strip())) if infos[1] not in ids: ids[infos[1]] = {"min": datetime(year=2100, month=1, day=1), "max": datetime(year=1900, month=1, day=1), "participants": Counter(), "count": 0} ids[infos[1]]["count"] += 1 ids[infos[1]]["participants"].update([participant]) if last: values = last.split(",") min_time = datetime.strptime(infos[0], "%m/%d/%Y %I:%M:%S %p") max_time = min_time + timedelta(milliseconds=int(values[0])) if ids[infos[1]]["min"] > min_time: ids[infos[1]]["min"] = min_time ids[infos[1]]["min_file"] = path if ids[infos[1]]["max"] < max_time: ids[infos[1]]["max"] = max_time ids[infos[1]]["max_file"] = path except: logging.error(__("In file {}", path)) raise for k, v in ids.items(): v["min"] = str(v["min"]) v["max"] = str(v["max"]) return headers, ids, files_with_3_infos