Esempio n. 1
0
def build_matrix_in_hour(hour):
    dao = Dao()
    # (lat, lng) -> {loc -> val}
    location_array = {}

    for row in dao.read_data_from_target_database('poi_model_result', {'hour': hour}):
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        loc = to_loc(row['day'])
        if key in location_array:
            location_array[key][loc] = row['value']
        else:
            location_array[key] = {loc: row['value']}

    print("load data finished!")
    res = []
    location = {}
    id = 0
    for key in location_array:
        cur_map = location_array[key]
        cur_array = []
        for i in range(7):
            if i in cur_map:
                cur_array.append(cur_map[i])
            else:
                cur_array.append(0)
        res.append((key, cur_array))
        location[key] = id
        id += 1

    matrix = np.zeros((len(res), 7))
    for i in range(len(res)):
        matrix[i] = np.array(res[i][1])

    dao.close()
    return matrix, location
def build_position_to_name(file_name=None):
    if file_name is None:
        file_name = DUMP_FILE

    print(os.path.abspath(file_name))
    if os.path.exists(file_name):
        with open(file_name, 'rb') as file:
            return pickle.load(file)

    print("we don't have dump data. build from script, please wait...")
    dao = Dao()

    position_name_map = {}
    count = 0
    for row in dao.read_data():
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        position_name_map[key] = row['name']
        count += 1
        if count % 1000 == 0:
            print(count)

    with open(DUMP_FILE, 'wb') as file:
        pickle.dump(position_name_map, file)

    dao.close()
    return position_name_map
def build_matrix_all_hours():
    dao = Dao()
    location_array = {}

    for row in dao.read_data():
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        loc = to_loc(row['month'], row['day'], row['hour'])
        if key in location_array:
            location_array[key][loc] = row['value']
        else:
            location_array[key] = {loc: row['value']}

    print("load data finished!")
    res = []
    location = []
    for key in location_array:
        cur_map = location_array[key]
        cur_array = []
        for i in range(35 * 5):
            if i in cur_map:
                cur_array.append(cur_map[i])
            else:
                cur_array.append(0)
        res.append((key, cur_array))
        location.append(key)

    matrix = np.zeros((len(res), 35 * 5))
    for i in range(len(res)):
        matrix[i] = np.array(res[i][1])

    return matrix, location
def find_top_position(top_count):
    dao = Dao()

    # read all data to build people count map
    res_map = {}
    count = 0
    for row in dao.read_data():
        count += 1
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        res_map[key] = res_map.get(key, 0) + row['value']
        if count % 10000 == 0:
            print(count)

    res_list = []
    for key in res_map:
        res_list.append((key, res_map[key]))

    # find top
    res_list.sort(key=lambda x: x[1], reverse=True)
    final_res = res_list[0:top_count]
    print(final_res)
    final_set = set()
    for row in final_res:
        final_set.add(row[0])

    print('write to view database')
    cache = []
    count = 0
    for row in dao.read_data_from_target_database(INTEGRATION_DATABASE):
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        if key in final_set:
            cache.append({
                'month': row['month'],
                'day': row['day'],
                'hour': row['hour'],
                'lng_gcj02': round(float(row['lng_gcj02']), 3),
                'lat_gcj02': round(float(row['lat_gcj02']), 3),
                'name': row['name'],
                'value': int(row['value'])
            })
            if len(cache) == 100:
                count += 100
                dao.insert_many(VIEW_DATABASE, cache)
                cache.clear()
                if count % 10000 == 0:
                    print(count)
    if len(cache) != 0:
        dao.insert_many(VIEW_DATABASE, cache)

    dao.close()
Esempio n. 5
0
def upload_data(matrix, map):
    DATABASE = 'correlation_model_result'
    dao = Dao()
    cache = []
    count = 0
    dao.clear_database(DATABASE)
    hour_list = [7, 12, 15, 20, 21]

    for i in range(0, len(matrix)):
        time_matrix = matrix[i]
        time_map = map[i]
        base_month = 9
        base_day = 24
        hour = hour_list[i]
        for j in time_map:
            lng, lat = str(j).split(',')
            for k in range(0, 7):
                item = {
                    'year': 2019,
                    'month': int(base_month),
                    'day': int(base_day + k),
                    'hour': hour,
                    'lng_gcj02': round(float(lng), 3),
                    'lat_gcj02': round(float(lat), 3),
                    'value': int(time_matrix[time_map[j]][k])
                    # 'value': 0
                }
                cache.append(item)

                # print(cache)
                if len(cache) == 100:
                    count += 100
                    # dao.insert_many(DATABASE, cache)
                    try:
                        dao.insert_many(DATABASE, cache)
                    except:
                        print(cache)
                        exit(-1)
                    cache.clear()
                    if count % 1000 == 0:
                        print(count)
    if len(cache) != 0:
        dao.insert_many(DATABASE, cache)
    dao.close()
def model_integration(ratio=(0.333, 0.333, 0.333)):
    print('model integration')

    dao = Dao()
    # clear database
    count = dao.clear_database(INTEGRATION_DATABASE, {
        'month': 9,
        'day': {
            '$gt': 23
        }
    })
    print('clear count: ', count)
    #

    position_name_map = build_position_to_name()
    res_map = {}
    # read data base
    read_data_from_database(dao, ratio[0], res_map, POI_DATABASE)
    read_data_from_database(dao, ratio[1], res_map, ARIMA_DATABASE)
    read_data_from_database(dao, ratio[2], res_map, CORRELATION_DATABSE)

    # insert into databse
    print('inserting into final database')
    cache = []
    count = 0
    for key in res_map:
        col = key.split(',')
        cache.append({
            'month': 9,
            'day': int(float(col[2])),
            'hour': int(float(col[3])),
            'lng_gcj02': round(float(col[0]), 3),
            'lat_gcj02': round(float(col[1]), 3),
            'name': position_name_map[col[0] + ',' + col[1]],
            'value': int(res_map[key])
        })
        if len(cache) == 100:
            count += 100
            dao.insert_many(INTEGRATION_DATABASE, cache)
            cache.clear()
            if count % 10000 == 0:
                print(count)
    if len(cache) != 0:
        dao.insert_many(INTEGRATION_DATABASE, cache)

    dao.close()
def current_data_for_district_hour():
    dao = Dao()
    # clear database
    # dao.clear_database(DISTRICT_DATABASE_HOUR)
    # insert into databse
    print('transfer current data into district database')
    district_map = {}
    count = 0
    for row in dao.read_data():
        count += 1
        key = (row['month'], row['day'], row['typecode'], row['adname'],
               row['hour'])
        if key in district_map:
            district_map[key]['value'] += row['value']
        else:
            district_map[key] = {
                'cityname': row['cityname'],
                'type': row['type'],
                'value': row['value']
            }
        if count % 10000 == 0:
            print('read: ', count)

    cache = []
    count = 0
    for key in district_map:
        cache.append({
            'month': key[0],
            'day': key[1],
            'hour': key[4],
            'cityname': district_map[key]['cityname'],
            'adname': key[3],
            'type': district_map[key]['type'],
            'typecode': key[2],
            'value': district_map[key]['value']
        })
        if len(cache) == 100:
            count += 100
            dao.insert_many(DISTRICT_DATABASE_HOUR, cache)
            cache.clear()
            if count % 10000 == 0:
                print('write: ', count)
    if len(cache) != 0:
        dao.insert_many(DISTRICT_DATABASE_HOUR, cache)

    dao.close()
def build_data_poi_feature():
    dao = Dao()
    x = []
    y = []

    count = 0
    for row in dao.read_data():
        cur_x = [day_to_weekday(row['year'], row['month'], row['day']), row['hour'],
                 row['lng_gcj02'], row['lat_gcj02'], int(row['typecode'][0:6])]

        x.append(cur_x)
        y.append(row['value'])

        count += 1
        if count % 10000 == 0:
            print(count)

    dao.close()

    np.savez('../data/poi_model/feature', x=x, y=y)
    return np.array(x), np.array(y)
Esempio n. 9
0
def build_matrix_in_hour(hour):
    dao = Dao()
    # (lat, lng) -> {loc -> val}
    location_array = {}

    for row in dao.read_data({'hour': hour}):
        key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02'])
        loc = to_loc(row['month'], row['day'])
        if key in location_array:
            location_array[key][loc] = row['value']
        else:
            location_array[key] = {loc: row['value']}

    print("load data finished!")
    res = []
    location = []
    for key in location_array:
        cur_map = location_array[key]
        cur_array = []
        for i in range(35):
            if i in cur_map:
                cur_array.append(cur_map[i])
            else:
                cur_array.append(0)
        res.append((key, cur_array))
        location.append(key)

    res.sort(key=lambda x: x[0])
    location.sort()

    matrix = np.zeros((len(res), 35))
    for i in range(len(res)):
        matrix[i] = np.array(res[i][1])

    dao.close()
    return matrix, location
def build_predict_data():
    dao = Dao()
    predict_row = set()

    count = 0
    for row in dao.read_data():
        cur_x = (row['lng_gcj02'], row['lat_gcj02'], int(row['typecode'][0:6]))
        predict_row.add(cur_x)

        count += 1
        if count % 10000 == 0:
            print(count)

    x = []
    for point in predict_row:
        for day in (1, 2, 3, 4, 5, 6, 0):
            for hour in [7, 12, 15, 20, 21]:
                x.append([day, hour, point[0], point[1], point[2]])

    x = np.array(x)
    # x -> [[day, hour, lng_gcj02, lat_gcj02, typecode],...]
    np.savez('../data/poi_model/predict', x=x)

    return x
Esempio n. 11
0
if __name__ == '__main__':
    print("reading data from training set...")

    if os.path.exists("../data/arima_model/matrix" + ".npz"):
        zip_file = np.load("../data/arima_model/matrix" + ".npz")
        matrix = zip_file['matrix']
        location = zip_file['location']
    else:
        print("No dump file! Reading from original file! Please wait... ")
        matrix, location = build_matrix()

    print("reading complete!")

    res = []

    dao = Dao()
    dao.clear_database(DATABASE)
    count = 0
    start_time = time.time()
    for i in range(matrix.shape[0]):
        lng_gcj02 = round(float(location[i].split(",")[0]), 3)
        lat_gcj02 = round(float(location[i].split(",")[1]), 3)
        try:
            predict_data = arima_predict(matrix[i], 2, 1, 5)[0]
            for j in range(forecast_step):
                res.append({
                    'year': 2019,
                    'month': 9,
                    'day': 24 + j // 5,
                    'hour': hour_map[j % 5],
                    'lng_gcj02': lng_gcj02,
Esempio n. 12
0
INVALID = 0
CURRENT = 1
PREDICT = 2

HOUR = 0
DAY = 1
WEEK = 2

INTEGRATION_DATABASE = 'integrated_result'
DISTRICT_DATABASE = 'district_result'
DISTRICT_DATABASE_HOUR = 'district_result_hour'
DUMP_DATA_FILE = './peoplePredict/model/data/dao_service/position_name_map.pkl'

# env
dao = Dao()
position_name = build_position_to_name(DUMP_DATA_FILE)
pos_map = {7: 1,
           12: 2,
           15: 3,
           20: 4,
           21: 5}


# interface
def get_map_data(month, day, hour, aggregate):
    status = check_is_valid(month, day, hour)

    if status == INVALID:
        return build_error_resp(
            'Invalid date param. Month: ' + str(month) + ' day: ' + str(day) + ' hour: ' + str(hour))
def current_data_integration():
    dao = Dao()
    # clear database
    dao.clear_database(INTEGRATION_DATABASE)
    # insert into databse
    print('transfer current data into final database')
    cache = []
    count = 0
    for row in dao.read_data():
        cache.append({
            'month': row['month'],
            'day': row['day'],
            'hour': row['hour'],
            'lng_gcj02': round(float(row['lng_gcj02']), 3),
            'lat_gcj02': round(float(row['lat_gcj02']), 3),
            'name': row['name'],
            'value': int(row['value'])
        })
        if len(cache) == 100:
            count += 100
            dao.insert_many(INTEGRATION_DATABASE, cache)
            cache.clear()
            if count % 10000 == 0:
                print(count)
    if len(cache) != 0:
        dao.insert_many(INTEGRATION_DATABASE, cache)

    dao.close()