def compute_vector(track, db_conn): vector = [0] * get_dimensions(db_conn) for tag, count in track['tags']: curr, gw = get_vector(db_conn, tag) if curr is None: continue weight = gw * log1p(float(count)) / log(2) vector = [(weight * x + y) for x, y in zip(curr, vector)] norm = sqrt(sum([x * x for x in vector])) if norm > 0: return tuple([x / norm for x in vector]) else: return tuple(vector)
def compute_vector(track, db_conn): vector = [0] * get_dimensions(db_conn) for tag, count in track['tags']: curr, gw = get_vector(db_conn, tag) if curr is None: continue weight = gw * log1p(float(count)) / log(2) vector = [(weight*x + y) for x, y in zip(curr, vector)] norm = sqrt(sum([x*x for x in vector])) if norm > 0: return tuple([x / norm for x in vector]) else: return tuple(vector)
def tag_features(tag): return get_vector(conn, tag, normalize=False)
#!/usr/bin/env python """Read tag feature vectors from the database.""" import argparse import math import sqlite3 import struct import sys from libunison.utils import DB_PATH, get_vector, print_vector def _parse_args(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('tag') parser.add_argument('--db', default=DB_PATH) return parser.parse_args() if __name__ == '__main__': args = _parse_args() # Convert tag name to UTF-8 and discard case. tag = unicode(args.tag, encoding='utf-8').lower() conn = sqlite3.connect(args.db) vector, weight = get_vector(conn, tag) if vector is None: print "Tag not found." sys.exit(0) print_vector(vector, weight)