def station_count(self): """ Return data point's stations length :return: list of stations size per hour """ stations = [0] * 24 for hour in self.data_file.buckets: for pt in self.data_file.buckets[hour]: try: pt_data = json.loads(pt['data']) if 'stations' in pt_data: stations[hour] = len(pt_data['stations']) except ValueError: logger.error(f"Invalid json string: {pt['data']}") raise ValueError logger.info(f"stations: {stations}") return stations
def read_data(filename): """ Read data in the file :param filename: :return: list of data sorted by timestamp """ result = [] with open(filename, 'r') as json_file: json_list = list(json_file) for json_str in json_list: try: result.append(json.loads(json_str)) except ValueError: logger.error(f"Invalid json string in {filename}: {json_str}") raise ValueError sorted_data = sorted(result, key=lambda x: x['timestamp']) return sorted_data
def avg_upd_not_dropped(self): """ Calculates average time difference of updates that are not dropped :return: """ sum_upd = 0 count = 0 for hour in self.data_file.buckets: for i in range(len(self.data_file.buckets[hour]) - 1): if not is_dropped(self.data_file.buckets[hour][i + 1]['timestamp'], self.data_file.buckets[hour][i]['timestamp']): sum_upd += self.data_file.buckets[hour][i + 1]['timestamp'] - \ self.data_file.buckets[hour][i]['timestamp'] count += 1 if count == 0: logger.error("every update is dropped") return 1.0 average = round(sum_upd / count, 2) logger.info(f"average upd duration if not dropped:{average}s") return average
def latest_dr_ver(self): """ Latest driver version per hour :return: list of driver versions per hour """ dr_ver = [None] * 24 no_dr_ver = [] for hour in self.data_file.buckets: for pt in self.data_file.buckets[hour]: try: pt_data = json.loads(pt['data']) if 'dr_ver' in pt_data: dr_ver[hour] = pt_data['dr_ver'] except ValueError: logger.error(f"Invalid json string: {pt['data']}") raise ValueError if dr_ver[hour] is None: no_dr_ver.append(hour) if len(no_dr_ver) != 0: logger.info(f"no driver version in hours: {no_dr_ver}") logger.info(f"driver version: {dr_ver}") return dr_ver
def neighbor_count(self): """ Return data point's neighbors length :return: list of neighbors size per hour """ neighbors = [0] * 24 prev = 0 for hour in self.data_file.buckets: for pt in self.data_file.buckets[hour]: if pt['type'] == "slow": try: pt_data = json.loads(pt['data']) neighbors_count = len(pt_data['neighbors']) neighbors[hour] = neighbors_count prev = neighbors_count except ValueError: logger.error(f"Invalid json string: {pt['data']}") raise ValueError except KeyError: logger.error(f"slow update at {pt['timestamp']} does not have neighbors") if neighbors[hour] == 0: neighbors[hour] = prev logger.info(f"neighbors: {neighbors}") return neighbors