def get_all_points(self, range=None, use_file=False, point_threshold=None): """ get all points of every time series. Point is a date-time that the time-series has data for returns a dictionary of the form {"time-series name": np.array([d1,d2,d3, ...])} dx are of the form "month/day/year-hours:minutes:seconds" dx are ordered range is used to filter the time series whose points will be returned range = [start_date, end_date] """ if not use_file or not os.path.exists("all-date-time-points"): self.assert_connected() tsnames_list = self.get_distinct_names(range=range, point_threshold=point_threshold) dt = {} for name in tsnames_list: c = self.execute_query("select name, date, time from dataset " "where name='%s' order by date, time" % name) # d = self.db_name.execute_query("select count(date) from dataset " # "where name='%s'" % name).fetchone()[0] # d = int(d) for res in c: name = res[0] date_time = res[1] + "-" + res[2] if name in dt: dt[name].append(date_time) else: dt[name] = [date_time] # print(d, len(dt[name])) # assert d == len(dt[name]) if use_file: with open("all-date-time-points", 'w') as f: for key, value in dt.items(): print(key + "," + ",".join(value), file=f) else: dt = {} with open("all-date-time-points", 'r') as f: for line in f: line = line[:-1] split_line = line.split(",") dt[split_line[0]] = split_line[1:] for key, value in dt.items(): dt[key] = np.array(value) return dt
def sortedDict(d): temp = sorted(d.items(), key=lambda x: x[1], reverse=True) sorted_dict = {} for item in temp: sorted_dict[item[0]] = item[1] return sorted_dict
def maxDictVal(dt): ''' Finds the key with the max integer value inside a dictionary and returns it Note: found on Stackoverflow - https://stackoverflow.com/questions/268272/getting-key-with-maximum-value-in-dictionary ''' try: topKey = max(dt.items(), key=operator.itemgetter(1))[0] except: topKey = -1 return topKey
def print_max_date_times(self): """ print the max (end) date-time of every time-series """ self.assert_connected() tsnames_list = self.get_distinct_names() dt = {} for name in tsnames_list: date_time = self.get_last_datetime(name) if date_time in dt: dt[date_time] += 1 continue else: dt[date_time] = 1 for key, value in sorted(dt.items()): print(key + " " + str(value))
def get_start_end_points(self, range=None, use_file=False, point_threshold=None): """ get the start date-time and end date-time of every time-series returns a dictionary of the form {"time-series name": [start_datetime, end_datetime]} start_datetime and end_datetime are of the form "month/day/year-hours:minutes:seconds" range is used to filter the time series whose points will be returned range = [start_date, end_date] """ if not use_file or not os.path.exists("date-time-pairs"): self.assert_connected() tsnames_list = self.get_distinct_names(range=range, point_threshold=point_threshold) dt = {} for name in tsnames_list: c = self.execute_query("select name, min(date), min(time), max(date), max(time) from dataset " "where name='%s'" % name) res = c.fetchone() name = res[0] min_date_time = res[1] + "-" + res[2] max_date_time = res[3] + "-" + res[4] dt[name] = [min_date_time, max_date_time] if use_file: with open("date-time-pairs", 'w') as f: for key, value in dt.items(): print(key + "," + value[0] + "," + value[1], file=f) else: dt = {} with open("date-time-pairs", 'r') as f: for line in f: line = line[:-1] split_line = line.split(",") dt[split_line[0]] = [split_line[1], split_line[2]] return dt
def invertDictionary(dt): return dict([[v, k] for k, v in dt.items()])