def __init__(self): self.helper = DivsiHelper() self.cnet_normalized = conceptnet_2d_from_db('en') self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle') self.affectWN = self.affectwn_raw.normalized() self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd() self.EN_NL = get_nl('en')
def __init__(self, emoticon_file=path+'/data/emoticons.csv', \ affect_wordnet_file=path+'/data/affectiveWNmatrix.pickle'): # Build emoticon dictionary self.emoticon = {} emoticon_reader = csv.reader(open(emoticon_file, 'r')) for emoticon, meaning in emoticon_reader: self.emoticon[emoticon.decode('utf-8')] = meaning self.emoticon_list = self.emoticon.keys() # Create blending of affect WordNet and ConceptNet cnet = conceptnet_2d_from_db('en') affectwn_raw = get_picklecached_thing(affect_wordnet_file) affectwn_normalized = affectwn_raw.normalized() theblend = Blend([affectwn_normalized, cnet]) self.affectwn = theblend.svd() # Get natural language processing tool self.nl = get_nl('en')
from csc.conceptnet4.models import Concept from csc.nl import get_nl #...tap the database to explore some concept dog = Concept.get('dog', 'en') print '' print "here are the key associations with %s" %'dog' for fwd in dog.get_assertions_forward()[:20]: print fwd print '' #this does a pca on a pickled tensor and finds things related to a word from csc.util.persist import get_picklecached_thing tensor = get_picklecached_thing('tensor.gz') #runs the svd svd = tensor.svd(k=100) #find similar concepts to a word myword = 'teach' print myword most_associated = svd.u_dotproducts_with(svd.weighted_u_vec(myword)).top_items(10) print '' print 'these words are most associated with %s' %myword for m in most_associated: print m #predict properties of a word
import orange import orngMDS import numpy as np from math import acos as _acos from csc.divisi.tensor import DenseTensor from csc.divisi.view import LabeledView from csc.util.persist import get_picklecached_thing cnet = get_picklecached_thing('cnet.pickle.gz') aspace = cnet.normalized().svd() n = aspace.u.shape[0] wut = aspace.weighted_u.tensor vecs = (wut[i, :] for i in xrange(n)) normalized_vecs = [vec.hat() for vec in vecs] def acos(x): if x > 1: return _acos(1) if x < -1: return _acos(-1) return _acos(x) concept_labels = aspace.weighted_u.label_list(0) print 'dist' distance = orange.SymMatrix(n) for i in range(n): for j in range(i + 1): distance[i, j] = acos(normalized_vecs[i] * normalized_vecs[j])
import orange import orngMDS import numpy as np from math import acos as _acos from csc.divisi.tensor import DenseTensor from csc.divisi.view import LabeledView from csc.util.persist import get_picklecached_thing cnet = get_picklecached_thing('cnet.pickle.gz') aspace = cnet.normalized().svd() n = aspace.u.shape[0] wut = aspace.weighted_u.tensor vecs = (wut[i,:] for i in xrange(n)) normalized_vecs = [vec.hat() for vec in vecs] def acos(x): if x > 1: return _acos(1) if x < -1: return _acos(-1) return _acos(x) concept_labels = aspace.weighted_u.label_list(0) print 'dist' distance = orange.SymMatrix(n) for i in range(n): for j in range(i+1): distance[i, j] = acos(normalized_vecs[i]*normalized_vecs[j]) print 'setup' mds=orngMDS.MDS(distance)
# vector can in fact be a matrix of many vectors # Dimensions: # vector = (m x ndim) or possibly just (ndim), # with ndim = K from the SVD # dist = (m x N) # means = (N) # mdsarray_sharp = (N x k) dist = (compute_distances(vector, self.landmarks) - self.means) / 2 return np.dot(dist, -self.mdsarray_sharp) def aspace_mds(): from csc.conceptnet.analogyspace import conceptnet_2d_from_db cnet = conceptnet_2d_from_db('en') aspace = cnet.normalized().svd(k=100) labels = cnet.label_list(0) ptmatrix = data(aspace.u) ptmatrix *= data(aspace.svals) proj = mds(ptmatrix) result = proj.project(data(aspace.u)) return LabeledView(DenseTensor(result), [labels, None]) if __name__ == '__main__': aspace_mds = get_picklecached_thing( "/Users/rspeer/code/luminoso/mds.pickle", aspace_mds)
self.N, self.k = self.mdsarray_sharp.shape def project(self, vector): # vector can in fact be a matrix of many vectors # Dimensions: # vector = (m x ndim) or possibly just (ndim), # with ndim = K from the SVD # dist = (m x N) # means = (N) # mdsarray_sharp = (N x k) dist = (compute_distances(vector, self.landmarks) - self.means)/2 return np.dot(dist, -self.mdsarray_sharp) def aspace_mds(): from csc.conceptnet.analogyspace import conceptnet_2d_from_db cnet = conceptnet_2d_from_db('en') aspace = cnet.normalized().svd(k=100) labels = cnet.label_list(0) ptmatrix = data(aspace.u) ptmatrix *= data(aspace.svals) proj = mds(ptmatrix) result = proj.project(data(aspace.u)) return LabeledView(DenseTensor(result), [labels, None]) if __name__ == '__main__': aspace_mds = get_picklecached_thing("/Users/rspeer/code/luminoso/mds.pickle", aspace_mds)