Esempio n. 1
0
    def process(self, table, group):
        """处理每条的kv字典, 默认值处理"""
        self.gameid.add(group["gameid"])

        fields = self.tables[table].FIELDS
        defaults = self.tables[table].DEFAULTS
        # 如果有多个_time字段,model会定义TIMESTAMP属性
        timestamp = self.tables[table].TIMESTAMP
        row = []
        for f in fields:
            v = group.get(f, "")
            if v != "":
                row.append(v)
                continue
            if f in defaults:
                v = defaults.get(f, "")
            elif f.endswith("_time") and (timestamp == "" or f == timestamp):
                if "timestamp" in group and group[
                        "timestamp"] != "0":  # 新版取timestamp
                    v = group["timestamp"]
                else:
                    v = util.datetime_timestamp(group["ds"], group["time"])
            row.append(v)

        self.data[table].append(row)
# dataset = pd.read_csv(dataurl+'PeMSD7_W_228.csv', header=None)
for i in range(dataset.shape[0]):
    for j in range(dataset.shape[1]):
        sid = i
        eid = j
        cost = dataset[i][j]
        if (sid, eid) not in reldict:
            reldict[(sid, eid)] = len(reldict)
            rel.append([len(reldict) - 1, 'geo', sid, eid, cost])
rel = pd.DataFrame(
    rel, columns=['rel_id', 'type', 'origin_id', 'destination_id', 'weight'])
rel.to_csv(dataname + '.rel', index=False)

dataset = pd.read_csv(dataurl + 'PeMSD7_V_228.csv', header=None)

start_time = util.datetime_timestamp('2012-05-01T00:00:00Z')
end_time = util.datetime_timestamp('2012-07-01T00:00:00Z')
timeslot = []
while start_time < end_time:
    if time.localtime(start_time).tm_wday == 5:  # 只考虑周一-周五 遇到周六直接跳到下周一
        start_time = start_time + 2 * 24 * 60 * 60
        if start_time >= end_time:
            break
    timeslot.append(util.timestamp_datetime(start_time))
    start_time = start_time + 5 * 60

# dyna = []
dyna_id = 0
dyna_file = open(dataname + '.dyna', 'w')
dyna_file.write('dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'entity_id' +
                ',' + 'traffic_speed' + '\n')
Esempio n. 3
0
reldict = dict()
# dataset = pd.read_csv(dataurl+'PEMS04.csv')
for i in range(dataset.shape[0]):
    sid = dataset['from'][i]
    eid = dataset['to'][i]
    cost = dataset['cost'][i]
    if (sid, eid) not in reldict:
        reldict[(sid, eid)] = len(reldict)
        rel.append([len(reldict) - 1, 'geo', sid, eid, cost])
rel = pd.DataFrame(
    rel, columns=['rel_id', 'type', 'origin_id', 'destination_id', 'cost'])
rel.to_csv(dataname + '.rel', index=False)

dataset = np.load(dataurl + 'PEMS04.npz')['data']  # (16992, 307, 3)

start_time = util.datetime_timestamp('2018-01-01T00:00:00Z')
end_time = util.datetime_timestamp('2018-3-01T00:00:00Z')
timeslot = []
while start_time < end_time:
    timeslot.append(util.timestamp_datetime(start_time))
    start_time = start_time + 5 * 60

# dyna = []
dyna_id = 0
dyna_file = open(dataname + '.dyna', 'w')
dyna_file.write('dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'entity_id' +
                ',' + 'traffic_flow' + ',' + 'traffic_occupancy' + ',' +
                'traffic_speed' + '\n')
for j in range(dataset.shape[1]):
    entity_id = j  # 这个数据集的id是0-306
    for i in range(len(timeslot)):