def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({'uid': uid, 'data': json.dumps(tag_matrix)}) output.close()
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({ 'uid': uid, 'data': json.dumps(tag_matrix) }) output.close()
def tag_proba_matrix(uid): locations = _location_by_uid_stop(uid) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) return make_response(dumps(tag_matrix))