Esempio n. 1
0
    def walk_files(self, args):
        nr, files = args
        old_logger = self.logger
        self.logger = self.logger.getChild(str(nr))
        self.logger.info(__("{} starting", nr))

        with self.new_client() as client:
            row_count = 0
            last_save = -1
            has_tmp_file = False
            for file in progress(files, logger=self.logger):
                try:
                    if last_save != row_count:
                        with open(self.checkpoint_copy_file.format(nr), "wb") as f:
                            pickle.dump(files, f)
                        last_save = row_count
                        has_tmp_file = True

                    row_count += self.parse_file(client, file)

                    if has_tmp_file:
                        os.replace(self.checkpoint_copy_file.format(nr), self.checkpoint_file.format(nr))
                        has_tmp_file = False
                except:
                    self.logger.error(__("In file  {}", file))
                    raise

        self.logger.info(__("finished reading {} rows", row_count))
        self.logger = old_logger
        return row_count
Esempio n. 2
0
def preprocess_cycle(nr, client, queue, sname, selector, fields, detector, dry_run=False):
    logger.info(__("Processing #{}: {} {}", nr, detector.attr, sname))
    stream = client.stream_params("samples", fields=fields, where=selector, group_order_by="ORDER BY time ASC")
    stream = progress(stream, delay=4, remote=queue.put)
    cycles, cycles_disc = detector(stream)

    if not dry_run:
        logger.info(__("Writing {} + {} = {} cycles", len(cycles), len(cycles_disc),
                       len(cycles) + len(cycles_disc)))
        client.write_points(
            detector.cycles_to_timeseries(cycles + cycles_disc, "charge_cycles"),
            tags={'detector': detector.attr},
            time_precision=client.time_epoch)

    logger.info(__("Task #{}: {} {} completed", nr, detector.attr, sname))
    return detector.attr, nr, len(cycles), len(cycles_disc)
Esempio n. 3
0
def _insert_into_db_and_archive_logs(path_and_data: Iterator[Tuple[Directory,
                                                                   File,
                                                                   Data]],
                                     queue: Queue = None):
    """
    :param path_and_data: an iterator over directories, log file names of their data
    """

    if arguments["--archive"]:
        logger.info("Start archiving all files")
    else:
        logger.info("Start uploading log files")

    with influxdb.connect(**config["webike.influx"]) as client:
        for directory, filename, data in progress(path_and_data,
                                                  delay=10,
                                                  remote=queue.put):
            # noinspection PyBroadException
            try:
                if arguments["--archive"]:
                    _archive_log(directory, filename)
                elif data is not None:
                    logger.debug(__("Upload file {file}", file=filename))
                    logger.debug(data)
                    client.write(data,
                                 {"db": config["webike.influx.database"]})
                    _archive_log(directory, filename)
                elif arguments["--strict"]:
                    _move_to_problem_folder(directory, filename)
            # try to import as many logs as possible, so just log any unexpected exceptions and keep going
            except KeyboardInterrupt:
                logger.error(
                    __("Interrupted by user at file {filename} in {directory}",
                       filename=filename,
                       directory=directory.name))
                raise
            except Exception as exception:
                logger.exception(
                    __("Error with file {filename} in {directory}:",
                       filename=filename,
                       directory=directory.name))
                _move_to_problem_folder(directory, filename)
Esempio n. 4
0
    def _read_rowdata_packet(self):
        """Read a rowdata packet for each data row in the result set."""
        rows = []
        for _ in progress(itertools.count(),
                          logger=logger,
                          level=logging.DEBUG,
                          verb="Got",
                          objects="rows"):  # == while True
            packet = self.connection._read_packet()
            if self._check_packet_is_eof(packet):
                break
            rows.append(self._read_row_from_packet(packet))

        self.affected_rows = len(rows)
        self.rows = tuple(rows)
        dur = time.perf_counter() - self.connection._query_start
        if dur > 4:
            logger.debug(
                __("Took {:.2f}s for executing query affecting {:,} rows", dur,
                   len(rows)))
        self.connection = None  # release reference to kill cyclic reference.
Esempio n. 5
0
def preprocess_trip(nr, client, queue, sname, sselector, dry_run=False):
    logger.info(__("Processing #{}: {}", nr, sname))
    detector = TripDetection(time_epoch=client.time_epoch)
    stream = client.stream_params(
        "samples",
        fields="time, veh_speed, participant, veh_odometer, hvbatt_soc, outside_air_temp, "
               "fuel_rate, hvbatt_current, hvbatt_voltage, hvbs_cors_crnt, hvbs_fn_crnt",
        where=join_selectors([sselector, "veh_speed > 0"]), group_order_by="ORDER BY time ASC"
    )
    stream = progress(stream, delay=4, remote=queue.put)
    cycles, cycles_disc = detector(stream)

    if not dry_run:
        logger.info(__("Writing {} + {} = {} trips", len(cycles), len(cycles_disc),
                       len(cycles) + len(cycles_disc)))
        client.write_points(
            detector.cycles_to_timeseries(cycles + cycles_disc, "trips"),
            tags={'detector': detector.attr},
            time_precision=client.time_epoch)

    logger.info(__("Task #{}: {} completed", nr, sname))
    return nr, len(cycles), len(cycles_disc)
Esempio n. 6
0
def analyze(root):
    headers = Counter()
    ids = {}
    files_with_3_infos = []

    for path in progress(SafeFileWalker(root)):
        try:
            m = re.search('Participant ([0-9]{2}b?)', path)
            participant = m.group(1)
            if participant == "10b":
                participant = 11
            else:
                participant = int(participant)
            if participant not in range(1, 12):
                logger.warning(__("Illegal participant {} from file {}", participant, path))

            size = os.stat(path).st_size
            with open(path, 'rb') as f:
                first = f.readline().decode()
                second = f.readline().decode()
                last = None
                if f.tell() < size:
                    offs = -100
                    while True:
                        f.seek(max(offs, -size), 2)
                        lines = f.readlines()
                        if len(lines) > 1:
                            last = lines[-1].decode()
                            break
                        if -offs > size:
                            break
                        offs *= 2

                if "Trip" in first:
                    logger.warning(__("Skipping trip file {}", path))
                    continue
                header = first.strip().split(",")
                if header[0] != "Timestamp":
                    logger.warning(__("Illegal header row in {}:1 '{}'", path, first.strip()))
                headers.update(header)

                infos = second.strip().split(",")
                if len(infos) != 3 or len(infos[2]) != 0:
                    if infos[2] in FW3I_VALUES and FW3I_FOLDER in path:
                        files_with_3_infos.append(path)
                    else:
                        logger.warning(__("Invalid info in {}:2 '{}'", path, second.strip()))
                if infos[1] not in ids:
                    ids[infos[1]] = {"min": datetime(year=2100, month=1, day=1),
                                     "max": datetime(year=1900, month=1, day=1),
                                     "participants": Counter(), "count": 0}
                ids[infos[1]]["count"] += 1
                ids[infos[1]]["participants"].update([participant])

                if last:
                    values = last.split(",")
                    min_time = datetime.strptime(infos[0], "%m/%d/%Y %I:%M:%S %p")
                    max_time = min_time + timedelta(milliseconds=int(values[0]))
                    if ids[infos[1]]["min"] > min_time:
                        ids[infos[1]]["min"] = min_time
                        ids[infos[1]]["min_file"] = path
                    if ids[infos[1]]["max"] < max_time:
                        ids[infos[1]]["max"] = max_time
                        ids[infos[1]]["max_file"] = path
        except:
            logging.error(__("In file {}", path))
            raise

    for k, v in ids.items():
        v["min"] = str(v["min"])
        v["max"] = str(v["max"])
    return headers, ids, files_with_3_infos