Ejemplo n.º 1
0
def get_weather_data_array_db(name):
    db = connect_mongodb()
    weather_array = np.zeros(shape=(27*5, 4), dtype=np.float32)
    weather_cursor = db.get_collection(name).find({})
    for weather_entry in weather_cursor:
        date_idx = weather_entry['date']
        time_slot_idx = weather_entry['time_slot']
        total_idx = get_idx(date_idx, time_slot_idx)
        if np.sum(weather_array[total_idx]) == 0:
            weather_array[total_idx, 0] = time_slot_idx
            # todo use one line to assign the value
            weather_array[total_idx, 1] = float(weather_entry['Weather'])
            weather_array[total_idx, 2] = float(weather_entry['temprature'])
            weather_array[total_idx, 3] = float(weather_entry['PM25'])
        else:
            weather_array[total_idx, 1] = float(weather_array[total_idx, 1] + weather_entry['Weather']) / 2.0
            weather_array[total_idx, 2] = float(weather_array[total_idx, 2] + weather_entry['temprature']) / 2.0
            weather_array[total_idx, 3] = float(weather_array[total_idx, 3] + weather_entry['PM25']) / 2.0

    # find the 0 time_slot
    time_slot_column = weather_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            weather_array[idx, 0] = predict_time_slot_window[(idx) % 27]
    return weather_array
def get_order_data_array_db_lei(district_idx, name):
    db = connect_mongodb()
    start = time.time()
    order_array = np.zeros(shape=(144 * 21, 6), dtype=np.float32)
    order_cursor = db.get_collection(name).find({'st_district_id': district_idx}, no_cursor_timeout=True)

    # process order data, 4 features used
    for order_entry in order_cursor:
        date_idx = order_entry['date']
        time_slot_idx = order_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1

        order_array[total_idx, 0] = time_slot_idx
        order_array[total_idx, 1] += 1
        if len(str(order_entry['driver_id'])) < 5:
            order_array[total_idx, 2] += 1
        if order_entry['ed_district_id'] == 0:
            order_array[total_idx, 3] += 1

    # todo remove the find 0, use idx to calculate
    # fill the 0 orders time_slot
    time_slot_column = order_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            order_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                order_array[idx, 0] = 144
            if idx % 144 == 0:
                order_array[idx, 0] = 1

    order_cursor.rewind()
    end = time.time()
    print('Processed Order in District: %d in %.2f seconds' % (district_idx, (end - start)))
    return order_array
Ejemplo n.º 3
0
def  get_traffic_data_array_db(district_idx):
    db = connect_mongodb()
    start = time.time()
    traffic_array = np.zeros(shape=(27 * 5, 5), dtype=np.float32)
    traffic_cursor = db.get_collection('test_traffic_data').find({'district_ID': district_idx}, no_cursor_timeout=True)

    for traffic_entry in traffic_cursor:
        date_idx = traffic_entry['date']
        time_slot_idx = traffic_entry['time_slot']
        total_idx = get_idx(date_idx, time_slot_idx)

        # del traffic_entry['_id']
        # del traffic_entry['district_ID']
        # del traffic_entry['time_slot']
        # del traffic_entry['date']
        traffic_array[total_idx, 0] = time_slot_idx
        p = np.asarray(traffic_entry.values())[0:4]
        traffic_array[total_idx, 1:5] = p

    # fill the 0 orders time_slot
    time_slot_column = traffic_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        print('How many zeros: %d' % (zero_idx.size))
        for idx in zero_idx:
            traffic_array[idx, 0] = predict_time_slot_window[(idx) % 27]

    traffic_cursor.rewind()
    end = time.time()
    print('Processed Traffic in District: %d in %.2f seconds' % (district_idx, (end - start)))
    return traffic_array
Ejemplo n.º 4
0
def get_order_data_array_db(district_idx):
    db = connect_mongodb()
    start = time.time()
    order_array = np.zeros(shape=(27*5, 4), dtype=np.float32)
    order_cursor = db.get_collection('test_order_data').find({'st_district_id': district_idx}, no_cursor_timeout=True)

    # process order data, 4 features used
    for order_entry in order_cursor:
        date_idx = order_entry['date']
        time_slot_idx = order_entry['time_slot']
        total_idx = get_idx(date_idx, time_slot_idx)

        order_array[total_idx, 0] = time_slot_idx
        order_array[total_idx, 1] += 1
        if len(str(order_entry['driver_id'])) < 5:
            order_array[total_idx, 2] += 1
        if order_entry['ed_district_id'] == 0:
            order_array[total_idx, 3] += 1

    # fill the 0 orders time_slot
    time_slot_column = order_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        print('How many zeros: %d' % (zero_idx.size))
        for idx in zero_idx:
            order_array[idx, 0] = predict_time_slot_window[(idx) % 27]

    order_cursor.rewind()
    end = time.time()
    print('Processed Order in District: %d in %.2f seconds' % (district_idx, (end - start)))
    return order_array
def get_traffic_data_array_db(district_idx, name):
    db = connect_mongodb()
    start = time.time()
    traffic_array = np.zeros(shape=(144 * 21, 5), dtype=np.float32)
    traffic_cursor = db.get_collection(name).find(
        {'district_ID': district_idx}, no_cursor_timeout=True)

    for traffic_entry in traffic_cursor:
        date_idx = traffic_entry['date']
        time_slot_idx = traffic_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1

        # del traffic_entry['_id']
        # del traffic_entry['district_ID']
        # del traffic_entry['time_slot']
        # del traffic_entry['date']
        traffic_array[total_idx, 0] = time_slot_idx
        p = np.asarray(traffic_entry.values())[0:4]
        traffic_array[total_idx, 1:5] = p

    # todo remove the find 0, use idx to calculate
    # fill the 0 orders time_slot
    time_slot_column = traffic_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            count = 0
            temp_array = np.zeros(shape=(1, 4))
            for windows_idx in range(1, 6 + 1, 1):
                temp_idx = idx - 4 + windows_idx
                if (temp_idx > 0) & (temp_idx < 3024):
                    if np.sum(traffic_array[temp_idx, 1:5]) != 0:
                        count += 1
                        temp_array += traffic_array[temp_idx, 1:5]
            array = temp_array / float(count)
            traffic_array[idx, 1:5] = array

            traffic_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                traffic_array[idx, 0] = 144
            if idx % 144 == 0:
                traffic_array[idx, 0] = 1

    traffic_cursor.rewind()
    end = time.time()
    print('Processed Traffic in District: %d in %.2f seconds' %
          (district_idx, (end - start)))
    return traffic_array
def get_traffic_data_array_db(district_idx, name):
    db = connect_mongodb()
    start = time.time()
    traffic_array = np.zeros(shape=(144 * 21, 5), dtype=np.float32)
    traffic_cursor = db.get_collection(name).find({'district_ID': district_idx}, no_cursor_timeout=True)

    for traffic_entry in traffic_cursor:
        date_idx = traffic_entry['date']
        time_slot_idx = traffic_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1

        # del traffic_entry['_id']
        # del traffic_entry['district_ID']
        # del traffic_entry['time_slot']
        # del traffic_entry['date']
        traffic_array[total_idx, 0] = time_slot_idx
        p = np.asarray(traffic_entry.values())[0:4]
        traffic_array[total_idx, 1:5] = p

    # todo remove the find 0, use idx to calculate
    # fill the 0 orders time_slot
    time_slot_column = traffic_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            count = 0
            temp_array = np.zeros(shape=(1, 4))
            for windows_idx in range(1, 6 + 1, 1):
                temp_idx = idx - 4 + windows_idx
                if (temp_idx>0) & (temp_idx<3024):
                    if np.sum(traffic_array[temp_idx, 1:5]) != 0:
                        count += 1
                        temp_array += traffic_array[temp_idx, 1:5]
            array = temp_array / float(count)
            traffic_array[idx, 1:5] = array

            traffic_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                traffic_array[idx, 0] = 144
            if idx % 144 == 0:
                traffic_array[idx, 0] = 1

    traffic_cursor.rewind()
    end = time.time()
    print('Processed Traffic in District: %d in %.2f seconds' % (district_idx, (end - start)))
    return traffic_array
def get_weather_data_array_db(name):
    db = connect_mongodb()
    weather_array = np.zeros(shape=(144 * 21, 4), dtype=np.float32)
    weather_cursor = db.get_collection(name).find({})
    for weather_entry in weather_cursor:
        date_idx = weather_entry['date']
        time_slot_idx = weather_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1
        if np.sum(weather_array[total_idx]) == 0:
            weather_array[total_idx, 0] = time_slot_idx
            # todo use one line to assign the value
            weather_array[total_idx, 1] = float(weather_entry['Weather'])
            weather_array[total_idx, 2] = float(weather_entry['temprature'])
            weather_array[total_idx, 3] = float(weather_entry['PM25'])
        else:
            weather_array[total_idx, 1] = float(weather_array[total_idx, 1] +
                                                weather_entry['Weather']) / 2.0
            weather_array[total_idx,
                          2] = float(weather_array[total_idx, 2] +
                                     weather_entry['temprature']) / 2.0
            weather_array[total_idx, 3] = float(weather_array[total_idx, 3] +
                                                weather_entry['PM25']) / 2.0

    # find the 0 time_slot
    time_slot_column = weather_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            # fill the zeros of the weather data
            count = 0
            temp_array = np.zeros(shape=(1, 3))
            for windows_idx in range(1, 6 + 1, 1):
                temp_idx = idx - 4 + windows_idx
                if (temp_idx > 0) & (temp_idx < 3024):
                    if (np.sum(weather_array[temp_idx, 1:4]) != 0):
                        count += 1
                        temp_array += weather_array[temp_idx, 1:4]
            array = temp_array / float(count)
            weather_array[idx, 1:4] = array
            # fill the 0 time slot
            weather_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                weather_array[idx, 0] = 144
            if idx % 144 == 0:
                weather_array[idx, 0] = 1
    return weather_array
def get_weather_data_array_db(name):
    db = connect_mongodb()
    weather_array = np.zeros(shape=(144 * 21, 4), dtype=np.float32)
    weather_cursor = db.get_collection(name).find({})
    for weather_entry in weather_cursor:
        date_idx = weather_entry['date']
        time_slot_idx = weather_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1
        if np.sum(weather_array[total_idx]) == 0:
            weather_array[total_idx, 0] = time_slot_idx
            # todo use one line to assign the value
            weather_array[total_idx, 1] = float(weather_entry['Weather'])
            weather_array[total_idx, 2] = float(weather_entry['temprature'])
            weather_array[total_idx, 3] = float(weather_entry['PM25'])
        else:
            weather_array[total_idx, 1] = float(weather_array[total_idx, 1] + weather_entry['Weather']) / 2.0
            weather_array[total_idx, 2] = float(weather_array[total_idx, 2] + weather_entry['temprature']) / 2.0
            weather_array[total_idx, 3] = float(weather_array[total_idx, 3] + weather_entry['PM25']) / 2.0

    # find the 0 time_slot
    time_slot_column = weather_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            # fill the zeros of the weather data
            count = 0
            temp_array = np.zeros(shape=(1, 3))
            for windows_idx in range(1, 6 + 1, 1):
                temp_idx = idx - 4 + windows_idx
                if (temp_idx > 0) & (temp_idx < 3024):
                    if (np.sum(weather_array[temp_idx, 1:4]) != 0):
                        count += 1
                        temp_array += weather_array[temp_idx, 1:4]
            array = temp_array / float(count)
            weather_array[idx, 1:4] = array
            # fill the 0 time slot
            weather_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                weather_array[idx, 0] = 144
            if idx % 144 == 0:
                weather_array[idx, 0] = 1
    return weather_array
def get_order_data_array_db(district_idx, name):
    db = connect_mongodb()
    start = time.time()
    order_array = np.zeros(shape=(144 * 21, 4), dtype=np.float32)
    order_cursor = db.get_collection(name).find(
        {'st_district_id': district_idx}, no_cursor_timeout=True)

    # process order data, 4 features used
    for order_entry in order_cursor:
        date_idx = order_entry['date']
        time_slot_idx = order_entry['time_slot']
        total_idx = (date_idx - 1) * 144 + time_slot_idx - 1

        order_array[total_idx, 0] = time_slot_idx
        order_array[total_idx, 1] += 1
        if len(str(order_entry['driver_id'])) < 5:
            order_array[total_idx, 2] += 1
        if order_entry['ed_district_id'] == 0:
            order_array[total_idx, 3] += 1

    # todo remove the find 0, use idx to calculate
    # fill the 0 orders time_slot
    time_slot_column = order_array[:, 0]
    zero_idx = np.where(time_slot_column == 0)[0]
    if zero_idx.size > 0:
        for idx in zero_idx:
            order_array[idx, 0] = (idx + 1) % 144
            if (idx + 1) % 144 == 0:
                order_array[idx, 0] = 144
            if idx % 144 == 0:
                order_array[idx, 0] = 1

    order_cursor.rewind()
    end = time.time()
    print('Processed Order in District: %d in %.2f seconds' % (district_idx,
                                                               (end - start)))
    return order_array