def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({'uid': uid, 'data': json.dumps(tag_matrix)}) output.close()
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({ 'uid': uid, 'data': json.dumps(tag_matrix) }) output.close()
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): locations = area_by_uid_stop(uid, db, get_business_logs) matrix = generate_matrix(locations) writer.writerow({'uid': uid, 'data': json.dumps(matrix)}) output.close()
def run(): output = open(sys.argv[1], "w") writer = DictWriter(output, fieldnames=["uid", "data"]) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): logs = fetch_user_location_logs(uid, db) locations = merge_locations(logs) matrix = generate_matrix(locations) writer.writerow({"uid": uid, "data": json.dumps(matrix)}) output.close()
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): logs = fetch_user_location_logs(uid, db) locations = merge_locations(logs) matrix = generate_matrix(locations) writer.writerow({'uid': uid, 'data': json.dumps(matrix)}) output.close()
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): locations = area_by_uid_stop(uid, db, get_business_logs) matrix = generate_matrix(locations) writer.writerow({ 'uid': uid, 'data': json.dumps(matrix) }) output.close()
def test_generate_matrix(self): data = [ { "date": "01", "locations": [ { "duration": 423.26666666666665, "start_time": "20131201001835", "end_time": "20131201072151", "location": "116.21832 40.02880" }, { "duration": 513.2166666666667, "start_time": "20131201095115", "end_time": "20131201182428", "location": "116.35075 39.92321" }, { "duration": 241.7, "start_time": "20131201194838", "end_time": "20131201235020", "location": "116.21863 40.01969" } ] }, { "date": "02", "locations": [ { "duration": 442.23333333333335, "start_time": "20131202000048", "end_time": "20131202072302", "location": "116.21863 40.01969" }, { "duration": 602.3333333333334, "start_time": "20131202091812", "end_time": "20131202192032", "location": "116.34819 39.92131" } ] } ] expected = {'116.35075 39.92321': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '116.34819 39.92131': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0], '116.21832 40.02880': [0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '116.21863 40.01969': [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]} returned = ppm.generate_matrix(data) self.assertEqual(returned, expected)
def tag_proba_matrix(uid): locations = _location_by_uid_stop(uid) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) return make_response(dumps(tag_matrix))
def most_proba_locations_holiday(uid): locations = _location_by_uid_stop_holiday(uid) matrix = generate_matrix(locations) most_proba_locations = pretty_print_most_proba_locations(get_most_proba_locations(matrix)) return make_response(dumps(most_proba_locations))
def proba_matrix_workday(uid): locations = _location_by_uid_stop_workday(uid) return make_response(dumps(generate_matrix(locations)))
def district_proba_matrix(uid): locations = area_by_uid_stop(uid, area_func=fetch_uid_district_data) return make_response(dumps(generate_matrix(locations)))
def semantic_proba_matrix(uid): locations = area_by_uid_stop(uid, area_func=fetch_uid_business_data) return make_response(dumps(generate_matrix(locations)))