コード例 #1
0
ファイル: actions.py プロジェクト: jackleg/wizlang
 def __init__(self,
              preloaded_actor=None,
              subsampling=False,
              fast=False,
              test=True):
     """We need to load and preprocess all of the vectors into the 
        memory and persist them to cut down on IO costs"""
     if not preloaded_actor:
         # a= 'all'
         # w='wikipedia'
         trained = "data"
         #fnw = '%s/vectors.fullwiki.1000.s50.5k.words' % trained
         fnw = '%s/vectors.fullwiki.1000.s50.words' % trained
         fnw = '%s/freebase.words' % trained
         if False:
             wc2t = '%s/c2t' % './data'
             wt2c = '%s/t2c' % './data'
             # all word vecotor lib VL
             self.wc2t = cPickle.load(open(wc2t))
             self.wt2c = cPickle.load(open(wt2c))
             print "Loading...",
             ks, vs = [], []
             for k, v in self.wc2t.iteritems():
                 k = veclib.canonize(k, {}, match=False)
                 ks.append(k)
                 vs.append(v)
             for k, v in zip(ks, vs):
                 self.wc2t[k] = v
             print " done with veclib"
         # all words, word to index mappings w2i
         if os.path.exists(fnw + '.pickle'):
             self.aw2i, self.ai2w = cPickle.load(open(fnw + '.pickle'))
         else:
             self.aw2i, self.ai2w = veclib.get_words(fnw)
             cPickle.dump([self.aw2i, self.ai2w],
                          open(fnw + '.pickle', 'w'))
         print " done with aw2i"
     else:
         # Wikipedia articles and their canonical transformations
         if False:
             self.wc2t = preloaded_actor.wc2t  #Wiki dump article titles
             self.wt2c = preloaded_actor.wt2c
         # All vectors from word2vec
         self.aw2i = preloaded_actor.aw2i
         self.ai2w = preloaded_actor.ai2w
コード例 #2
0
ファイル: actions.py プロジェクト: EricSchles/wizlang
 def __init__(self, preloaded_actor=None, subsampling=False, 
              fast=False, test=True):
     """We need to load and preprocess all of the vectors into the 
        memory and persist them to cut down on IO costs"""
     if not preloaded_actor:
         # a= 'all'
         # w='wikipedia'
         trained = "data" 
         #fnw = '%s/vectors.fullwiki.1000.s50.5k.words' % trained
         fnw = '%s/vectors.fullwiki.1000.s50.words' % trained
         fnw = '%s/freebase.words' % trained
         if False:
             wc2t = '%s/c2t' % './data'
             wt2c = '%s/t2c' % './data'
             # all word vecotor lib VL
             self.wc2t = cPickle.load(open(wc2t))
             self.wt2c = cPickle.load(open(wt2c))
             print "Loading...", 
             ks, vs  = [], []
             for k, v in self.wc2t.iteritems():
                 k = veclib.canonize(k, {}, match=False)
                 ks.append(k)
                 vs.append(v)
             for k, v in zip(ks, vs):
                 self.wc2t[k] = v
             print " done with veclib"
         # all words, word to index mappings w2i
         if os.path.exists(fnw + '.pickle'):
             self.aw2i , self.ai2w = cPickle.load(open(fnw + '.pickle'))
         else:
             self.aw2i , self.ai2w = veclib.get_words(fnw)
             cPickle.dump([self.aw2i, self.ai2w], open(fnw + '.pickle','w'))
         print " done with aw2i"
     else:
         # Wikipedia articles and their canonical transformations
         if False:
             self.wc2t = preloaded_actor.wc2t #Wiki dump article titles
             self.wt2c = preloaded_actor.wt2c
         # All vectors from word2vec
         self.aw2i = preloaded_actor.aw2i
         self.ai2w = preloaded_actor.ai2w
コード例 #3
0
ファイル: backend.py プロジェクト: arjunmenon/wizlang
from utils import *
 
app = Flask(__name__,  static_folder='static', 
            static_url_path='', template_folder='templates')

trained = "/home/ubuntu/data" 
fnv = '%s/vectors.fullwiki.1000.s50.num.npy' % trained
fnw = '%s/vectors.fullwiki.1000.s50.words' % trained
ffb = '%s/freebase_types_and_fullwiki.1000.s50.words' % trained
avl = veclib.get_vector_lib(fnv)
#avl = veclib.normalize(avl)
avl = veclib.split(veclib.normalize, avl)
if os.path.exists(fnw + '.pickle'):
    aw2i, ai2w = cPickle.load(open(fnw + '.pickle'))
else:
    aw2i, ai2w = veclib.get_words(fnw)
    cPickle.dump([aw2i, ai2w], open(fnw + '.pickle','w'))
frac = None
if frac:
    end = int(avl.shape[0] * frac)
    avl = avl[:end]
    for i in range(end, avl.shape):
        del aw2i[ai2w[i].pop()]

@app.route('/farthest/<raw_query>')
#@json_exception
def farthest(raw_query='{"args":["iphone", "ipad", "ipod", "walkman"]}'):
    """Given a list of arguments, calculate all the N^2 distance matrix
    and return the item farthest away. The total distance is just the 
    distance from a node to all other nodes seperately."""
    print 'QUERY'
コード例 #4
0
ファイル: backend.py プロジェクト: hellcoderz/wizlang
from utils import *
 
app = Flask(__name__,  static_folder='static', 
            static_url_path='', template_folder='templates')

trained = "/home/ubuntu/data" 
fnv = '%s/vectors.fullwiki.1000.s50.num.npy' % trained
fnw = '%s/vectors.fullwiki.1000.s50.words' % trained
ffb = '%s/freebase_types_and_fullwiki.1000.s50.words' % trained
avl = veclib.get_vector_lib(fnv)
#avl = veclib.normalize(avl)
avl = veclib.split(veclib.normalize, avl)
if os.path.exists(fnw + '.pickle'):
    aw2i, ai2w = cPickle.load(open(fnw + '.pickle'))
else:
    aw2i, ai2w = veclib.get_words(fnw)
    cPickle.dump([aw2i, ai2w], open(fnw + '.pickle','w'))
frac = None
if frac:
    end = int(avl.shape[0] * frac)
    avl = avl[:end]
    for i in range(end, avl.shape):
        del aw2i[ai2w[i].pop()]

@app.route('/farthest/<raw_query>')
#@json_exception
def farthest(raw_query='{"args":["iphone", "ipad", "ipod", "walkman"]}'):
    """Given a list of arguments, calculate all the N^2 distance matrix
    and return the item farthest away. The total distance is just the 
    distance from a node to all other nodes seperately."""
    print 'QUERY'