def process(self, table, group): """处理每条的kv字典, 默认值处理""" self.gameid.add(group["gameid"]) fields = self.tables[table].FIELDS defaults = self.tables[table].DEFAULTS # 如果有多个_time字段,model会定义TIMESTAMP属性 timestamp = self.tables[table].TIMESTAMP row = [] for f in fields: v = group.get(f, "") if v != "": row.append(v) continue if f in defaults: v = defaults.get(f, "") elif f.endswith("_time") and (timestamp == "" or f == timestamp): if "timestamp" in group and group[ "timestamp"] != "0": # 新版取timestamp v = group["timestamp"] else: v = util.datetime_timestamp(group["ds"], group["time"]) row.append(v) self.data[table].append(row)
# dataset = pd.read_csv(dataurl+'PeMSD7_W_228.csv', header=None) for i in range(dataset.shape[0]): for j in range(dataset.shape[1]): sid = i eid = j cost = dataset[i][j] if (sid, eid) not in reldict: reldict[(sid, eid)] = len(reldict) rel.append([len(reldict) - 1, 'geo', sid, eid, cost]) rel = pd.DataFrame( rel, columns=['rel_id', 'type', 'origin_id', 'destination_id', 'weight']) rel.to_csv(dataname + '.rel', index=False) dataset = pd.read_csv(dataurl + 'PeMSD7_V_228.csv', header=None) start_time = util.datetime_timestamp('2012-05-01T00:00:00Z') end_time = util.datetime_timestamp('2012-07-01T00:00:00Z') timeslot = [] while start_time < end_time: if time.localtime(start_time).tm_wday == 5: # 只考虑周一-周五 遇到周六直接跳到下周一 start_time = start_time + 2 * 24 * 60 * 60 if start_time >= end_time: break timeslot.append(util.timestamp_datetime(start_time)) start_time = start_time + 5 * 60 # dyna = [] dyna_id = 0 dyna_file = open(dataname + '.dyna', 'w') dyna_file.write('dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'entity_id' + ',' + 'traffic_speed' + '\n')
reldict = dict() # dataset = pd.read_csv(dataurl+'PEMS04.csv') for i in range(dataset.shape[0]): sid = dataset['from'][i] eid = dataset['to'][i] cost = dataset['cost'][i] if (sid, eid) not in reldict: reldict[(sid, eid)] = len(reldict) rel.append([len(reldict) - 1, 'geo', sid, eid, cost]) rel = pd.DataFrame( rel, columns=['rel_id', 'type', 'origin_id', 'destination_id', 'cost']) rel.to_csv(dataname + '.rel', index=False) dataset = np.load(dataurl + 'PEMS04.npz')['data'] # (16992, 307, 3) start_time = util.datetime_timestamp('2018-01-01T00:00:00Z') end_time = util.datetime_timestamp('2018-3-01T00:00:00Z') timeslot = [] while start_time < end_time: timeslot.append(util.timestamp_datetime(start_time)) start_time = start_time + 5 * 60 # dyna = [] dyna_id = 0 dyna_file = open(dataname + '.dyna', 'w') dyna_file.write('dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'entity_id' + ',' + 'traffic_flow' + ',' + 'traffic_occupancy' + ',' + 'traffic_speed' + '\n') for j in range(dataset.shape[1]): entity_id = j # 这个数据集的id是0-306 for i in range(len(timeslot)):