Beispiel #1
0
def main():
    dataset_path = './dataset/h_train'

    val = [24, 25]

    datas = []

    noises = [1, 5, 6, 12, 13, 19, 20]

    for i in range(25):
        fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1))
        data = np.load(fpath)
        #  144 * 81 * 2
        data = torch.from_numpy(data)
        data = torch.einsum("ijk->jik", data)
        # 81 * 144 * 2
        # 浪费内存吧,反正内存不值钱
        ids = torch.from_numpy(np.array([np.ones(144) * i
                                         for i in range(81)])).double()
        # ids = ids.repeat(144, 1)
        ids = ids.view(81, 144, 1)
        data = torch.cat([data, ids], dim=2)
        assert data.shape == (81, 144, 3)
        datas.append(data)

    X = []
    for i in range(24 - 1):
        if i + 1 in noises: continue
        if i + 2 in noises: continue
        a = datas[i]
        b = datas[i + 1]
        c = torch.cat((a, b), dim=2)
        X.append(c)

    graph = read_graph_csv()
    rows, cols = graph.nonzero()
    rows = rows.reshape(-1)
    cols = cols.reshape(-1)
    edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda()
    edge_weight = torch.from_numpy(graph[rows, cols]).float().cuda()

    x = torch.tensor([i for i in range(81)], dtype=torch.float).cuda()

    graph_data = geoData(x, edge_index=edge_index, edge_attr=edge_weight)

    all_data = torch.cat(X, dim=0).float().cuda()

    a = datas[val[0] - 1]
    b = datas[val[1] - 1]
    val_data = torch.cat((a, b), dim=2).float().cuda()

    torch_dataset = Data.TensorDataset(all_data[:, :, :3], all_data[:, :, 3:5])
    loader = Data.DataLoader(dataset=torch_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=0)
    model = Model(graph_data)
    model.cuda()

    crition = nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=WEIGHT_DECAY)

    for epoch in range(epochs):
        total_loss = []
        time1 = time.time()

        model.train()

        for step, (batch_x, batch_y) in enumerate(loader):
            optimizer.zero_grad()

            X = batch_x
            y = batch_y[:, :, :]
            pred_y = model(X)
            loss = crition(pred_y, y)
            total_loss.append(loss.data.cpu().numpy())
            loss.backward()
            optimizer.step()

        model.eval()
        val_X = val_data[:, :, :3]
        val_y = val_data[:, :, 3:5]
        # print(val_y.shape)
        pred_y = model(val_X)
        # print(pred_y.shape, val_y.shape)
        val_loss = crition(pred_y, val_y)
        a = pred_y.data.cpu().numpy().reshape(1, -1)
        b = val_y.data.cpu().numpy().reshape(1, -1)
        val_loss_sklearn = mean_absolute_error(a, b)
        print("Epoch", epoch, 'train loss:', np.mean(total_loss))
        print("Epoch", epoch, 'validation loss:',
              val_loss.data.cpu().numpy().mean())
        print(time.time() - time1)
Beispiel #2
0
def main():
    dataset_path = './dataset/h_train'

    val = [24, 25]

    datas = []

    noises = [1, 5, 6, 12, 13, 19, 20]
    noises2 = [5, 6, 12, 13, 19, 20]

    for i in range(25):
        fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1))
        data = np.load(fpath)
        #  144 * 81 * 2
        data = torch.from_numpy(data)
        data = torch.einsum("ijk->jik", data)
        # 81 * 144 * 2
        # 浪费内存吧,反正内存不值钱
        ids = torch.from_numpy(np.array([np.ones(144) * i
                                         for i in range(81)])).double()
        ids = ids.view(81, 144, 1)

        data = torch.cat([data, ids], dim=2)
        assert data.shape == (81, 144, 3)

        datas.append(data)
    #
    # print(datas[0][0,0,2], datas[0][0,1,2])
    # return?

    avg = torch.zeros(81, 144, 2, dtype=torch.float)
    counts = 0
    # 将均值搞成节点属性
    for i in range(ALL_DATA - 1):
        if i + 1 in noises:
            continue
        item = datas[i]
        avg += item[:, :, 0:2].float()
        counts += 1
    avg /= counts

    print(counts)
    X = []
    for i in range(ALL_DATA - 1):
        if i + 1 in noises: continue
        if i + 2 in noises: continue
        a = datas[i]
        b = datas[i + 1]
        c = torch.cat((a, b), dim=2)
        X.append(c)
        # for j in range(2, 8):
        j = 8
        if i + j < 23 and (i + j + 1) not in noises:
            a = datas[i]
            b = datas[i + j]
            c = torch.cat((a, b), dim=2)
            X.append(c)

    graph = read_graph_csv()
    # graph_floyd = read_graph_csv('./maps/graph_floyd.csv')

    graph_floyd = np.load('./hjj_maps/avg_25.npy')

    # graph = graph_floyd
    # np.fill_diagonal(graph_floyd, 0)
    # print(graph_floyd)
    # b = graph_floyd / graph_floyd.sum(axis=1)
    # print(b)
    # c = np.zeros((81, 81))
    # c[b > 0.01] = 1
    # graph = c
    # print(graph)
    # print(graph.sum())

    rows, cols = graph.nonzero()
    rows = rows.reshape(-1)
    cols = cols.reshape(-1)

    edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda()
    edge_weight = torch.from_numpy(graph_floyd[rows, cols]).float().cuda()

    # x = torch.tensor([i for i in range(81)], dtype=torch.float).cuda()
    #  x就得修改
    x = avg.cuda()

    graph_data = geoData(x, edge_index=edge_index, edge_attr=edge_weight)

    all_data = torch.cat(X, dim=0).float().cuda()

    a = datas[val[0] - 1]
    b = datas[val[1] - 1]
    val_data = torch.cat((a, b), dim=2).float().cuda()

    torch_dataset = Data.TensorDataset(all_data[:, :, :3], all_data[:, :, 3:5])
    loader = Data.DataLoader(dataset=torch_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=0)
    model = Model(graph_data)
    model.cuda()

    crition = nn.L1Loss()
    # crition = F.mse_loss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=WEIGHT_DECAY)

    for epoch in range(epochs):
        total_loss = []
        time1 = time.time()

        model.train()
        # model.eval()

        for step, (batch_x, batch_y) in enumerate(loader):
            optimizer.zero_grad()

            X = batch_x
            y = batch_y[:, :, :]
            # print(X[0, 1, 2])
            # print(X[0, 2, 2])
            # return 0
            pred_y = model(X)
            loss = crition(pred_y, y)
            # loss1 = F.mse_loss(pred_y, y)
            total_loss.append(loss.data.cpu().numpy())
            loss.backward()
            optimizer.step()

        model.eval()
        val_X = val_data[:, :, :3]
        val_y = val_data[:, :, 3:5]
        # print(val_y.shape)
        pred_y = model(val_X)
        # print(pred_y.shape, val_y.shape)
        val_loss = crition(pred_y, val_y)
        a = pred_y.data.cpu().numpy().reshape(1, -1)
        b = val_y.data.cpu().numpy().reshape(1, -1)
        avg_c = avg.reshape(1, -1)
        val_loss_sklearn = mean_absolute_error(a, b)
        avg_loss = mean_absolute_error(avg_c, b)
        train_loss = np.mean(total_loss)
        val_loss = val_loss.data.cpu().numpy().mean()
        print("Epoch", epoch, 'train loss:', train_loss)
        print("Epoch", epoch, 'validation loss:', val_loss_sklearn)
        print("Epoch", epoch, 'avg loss:', np.mean(avg_loss))
        print(time.time() - time1)
        writer.add_scalars("scalar/loss", {'train_loss': train_loss}, epoch)
        writer.add_scalars("scalar/loss", {'val_loss': val_loss}, epoch)
        writer.add_scalars("scalar/loss", {'avg_loss': avg_loss}, epoch)
        writer.add_scalars("scalar/loss", {'13.5': 13.5}, epoch)
        writer.add_scalars("scalar/loss", {'13.3': 13.3}, epoch)
        writer.add_scalars("scalar/loss", {'13.1': 13.1}, epoch)
        writer.add_scalars("scalar/loss", {'12': 12}, epoch)
        writer.add_scalars("scalar/loss", {'11': 11}, epoch)
        writer.add_scalars("scalar/loss", {'10': 10}, epoch)

        if epoch % 20 == 0 and not DEBUG:
            fpath = os.path.join(dataset_path, '28.npy')
            test_data = np.load(fpath)

            # data = torch.from_numpy(data)
            # data = torch.einsum("ijk->jik", data)
            # # 81 * 144 * 2
            # # 浪费内存吧,反正内存不值钱
            # ids = torch.from_numpy(np.array([i for i in range(81)])).double()
            # ids = ids.repeat(144, 1)
            # ids = ids.view(81, 144, 1)
            # data = torch.cat([data, ids], dim=2)

            test_data = torch.from_numpy(test_data)
            test_data = torch.einsum("ijk->jik", test_data)
            ids = torch.from_numpy(
                np.array([np.ones(144) * i for i in range(81)])).double()
            ids = ids.view(81, 144, 1)

            test_data = torch.cat([test_data, ids], dim=2).float().cuda()

            res = model(test_data)
            res = torch.einsum('ijk->jik', res)
            res = res.data.cpu().numpy()

            def time2str(id, date):
                dt = datetime.datetime.strptime(date, "%Y-%m-%d")
                t1 = time.mktime(dt.timetuple()) + int(id) * 10 * 60
                t2 = t1 + 10 * 60
                t1_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t1))
                t2_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t2))

                return t1_str, t2_str

            date = '2019-01-29'
            with open(
                    './results/hjj_gcn_lstm_crazy/{}-{}.csv'.format(
                        date, epoch), 'w') as f:
                title = 'stationID,startTime,endTime,inNums,outNums'
                print(title, file=f)
                x, y, z = res.shape
                print(res[0][0])
                for j in range(y):
                    for i in range(x):
                        t1, t2 = time2str(i, date)
                        out_num, in_num = res[i][j]  # 0出,1进
                        print(j, t1, t2, in_num, out_num, sep=',', file=f)
Beispiel #3
0
def main():
    dataset_path = './dataset/h_train'

    val = [19, 20]

    datas = []

    noises = [1, 5, 6, 12, 13, 19, 20, 26]

    for i in range(26):
        fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1))
        data = np.load(fpath)
        #  144 * 81 * 2
        data = torch.from_numpy(data)
        data = torch.einsum("ijk->jik", data)
        # 81 * 144 * 2
        # 浪费内存吧,反正内存不值钱
        ids = torch.from_numpy(np.array([np.ones(144) * i
                                         for i in range(81)])).double()
        ids = ids.view(81, 144, 1)

        data = torch.cat([data, ids], dim=2)
        assert data.shape == (81, 144, 3)

        datas.append(data)
    #
    # print(datas[0][0,0,2], datas[0][0,1,2])
    # return?

    avg = torch.zeros(81, 144, 2, dtype=torch.float)
    counts = 0
    # 将均值搞成节点属性
    for i in range(26):
        #if i + 1 in noises:
        #    continue
        item = datas[i]
        avg += item[:, :, 0:2].float()
        counts += 1
    day_28 = np.load(os.path.join(dataset_path, '28.npy'))
    day_28 = torch.from_numpy(day_28).float()
    day_28 = torch.einsum("ijk->jik", day_28)
    avg += day_28
    counts += 1
    avg /= counts

    print(counts, 235)

    X = []

    ## 因为noise是日期,所以需要都+1判断
    for i in range(25):
        #if i == 19: continue #### Debug
        if i + 1 in noises:
            if i + 2 in noises:
                print(i, i + 1)
                a = datas[i]
                b = datas[i + 1]
                c = torch.cat((a, b), dim=2)
                X.append(c)
            continue
        if i + 2 in noises:
            continue
        more = 1
        # if i > 20 :
        # more = 2
        for _ in range(more):
            a = datas[i]
            b = datas[i + 1]
            c = torch.cat((a, b), dim=2)
            X.append(c)

        # j = 8
        # if i + j < 23 and (i+j+1) not in noises:
        #     a = datas[i]
        #     b = datas[i + j]
        #     c = torch.cat((a, b), dim=2)
        #     X.append(c)

    graph = read_graph_csv()
    graph_floyd = read_graph_csv('./maps/graph_floyd.csv')

    graph_floyd = np.load('./hjj_maps/avg_25.npy')

    rows, cols = graph.nonzero()
    rows = rows.reshape(-1)
    cols = cols.reshape(-1)

    edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda()

    #  x就得修改
    x = avg.cuda()

    graph_data = geoData(x, edge_index=edge_index)

    all_data = torch.cat(X, dim=0).float().cuda()

    a = datas[val[0] - 1]
    b = datas[val[1] - 1]
    val_data = torch.cat((a, b), dim=2).float().cuda()

    model = Model(graph_data, graph)

    print(model.eval())
    # print(avg[0])
    model.cuda()

    crition = nn.L1Loss()
    crition2 = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=WEIGHT_DECAY)

    assert all_data.shape[0] % 81 == 0, '一天81个站点'

    data_number = all_data.shape[0] // 81

    print(data_number, )
    for epoch in range(epochs):
        total_loss = []
        time1 = time.time()

        ids = np.arange(data_number)
        np.random.shuffle(ids)
        model.train()

        for i in ids:
            optimizer.zero_grad()

            batch_x = all_data[i * 81:(i + 1) * 81, :, :3]
            batch_y = all_data[i * 81:(i + 1) * 81, :, 3:]

            X = batch_x
            y = batch_y[:, :, :2]
            # print(X[0, 1, 2])
            # print(X[0, 2, 2])
            # return 0
            assert batch_x[-1, 1, 2] == 80
            assert batch_y[-1, 1, 2] == 80

            pred_y = model(X)
            # print(y.shape, pred_y.shape)

            loss = crition(pred_y, y)
            loss1 = crition2(pred_y, y)
            total_loss.append(loss.data.cpu().numpy())
            loss1.backward()
            optimizer.step()

        model.eval()
        val_X = val_data[:, :, :3]
        val_y = val_data[:, :, 3:5]
        # print(val_y.shape)
        pred_y = model(val_X)
        # print(pred_y.shape, val_y.shape)
        val_loss = crition(pred_y, val_y)
        a = pred_y.data.cpu().numpy().reshape(1, -1)
        b = val_y.data.cpu().numpy().reshape(1, -1)
        avg_c = avg.reshape(1, -1)
        val_loss_sklearn = mean_absolute_error(a, b)
        avg_loss = mean_absolute_error(avg_c, b)
        train_loss = np.mean(total_loss)
        val_loss = val_loss.data.cpu().numpy().mean()
        print("Epoch", epoch, 'train loss:', train_loss)
        print("Epoch", epoch, 'validation loss:', val_loss)
        print("Epoch", epoch, 'avg loss:', np.mean(avg_loss))
        print(time.time() - time1)
        writer.add_scalars("scalar/loss", {'train_loss': train_loss}, epoch)
        writer.add_scalars("scalar/loss", {'val_loss': val_loss}, epoch)
        writer.add_scalars("scalar/loss", {'avg_loss': avg_loss}, epoch)
        writer.add_scalars("scalar/loss", {'13.5': 13.5}, epoch)
        writer.add_scalars("scalar/loss", {'13.3': 13.3}, epoch)
        writer.add_scalars("scalar/loss", {'13.1': 13.1}, epoch)

        if epoch % 100 == 0 and not DEBUG:
            fpath = os.path.join(dataset_path, '26.npy')
            test_data = np.load(fpath)

            test_data = torch.from_numpy(test_data)
            test_data = torch.einsum("ijk->jik", test_data)
            ids = torch.from_numpy(
                np.array([np.ones(144) * i for i in range(81)])).double()
            ids = ids.view(81, 144, 1)

            test_data = torch.cat([test_data, ids], dim=2).float().cuda()

            res = model(test_data)
            res = torch.einsum('ijk->jik', res)
            res = res.data.cpu().numpy()

            def time2str(id, date):
                dt = datetime.datetime.strptime(date, "%Y-%m-%d")
                t1 = time.mktime(dt.timetuple()) + int(id) * 10 * 60
                t2 = t1 + 10 * 60
                t1_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t1))
                t2_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t2))

                return t1_str, t2_str

            date = '2019-01-27'
            filename = __file__
            filename = filename.replace('.py', '')
            if not os.path.exists('./results/hjj_map_all'):
                os.mkdir('./results/hjj_map_all')
            with open(
                    './results/hjj_map_all/{}-{}.csv'.format(filename, epoch),
                    'w') as f:
                title = 'stationID,startTime,endTime,inNums,outNums'
                print(title, file=f)
                x, y, z = res.shape
                print(res[0][0])
                for j in range(y):
                    for i in range(x):
                        t1, t2 = time2str(i, date)
                        out_num, in_num = res[i][j]  # 0出,1进
                        print(j, t1, t2, in_num, out_num, sep=',', file=f)