def get_yelp_blend(): cnettensor = get_picklecached_thing('cnet.pickle', conceptnet_with_custom_identities, 'ConceptNet') # To regenerate these next two, run yelp_matrix_creation.py yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None, 'Yelp TFIDF') yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None, 'Yelp Cats') print yelpwordstensor print yelpcatstensor return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None, 0.03)
def example(): # In the context of this corpus of healthcare-related messages (not # publicly provided), what do people want? health = get_picklecached_thing('health.pickle.gz', None) svd = run_cnet_blend(health, 'health_blend') svd.summarize(10) # Make a labeled vector of similarities to the feature person\Desires. vec = predict_concepts(svd, ('left', 'Desires', 'person')) # Get the list of concepts in ConceptNet. concepts = cnet.label_list(0) # Look for concepts that *aren't* in ConceptNet and are ranked highly # in the vector of results. for item in vec.top_items(5000): # skip over stopwords contentful = False for word in item[0].split(' '): if not en.nl.is_stopword(word): contentful = True break if contentful and item[0] not in concepts: # found one. print it along with its score. print item[0], item[1]
cnettensor = get_picklecached_thing('cnet.pickle', conceptnet_with_custom_identities, 'ConceptNet') # To regenerate these next two, run yelp_matrix_creation.py yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None, 'Yelp TFIDF') yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None, 'Yelp Cats') print yelpwordstensor print yelpcatstensor return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None, 0.03) if __name__ == '__main__': svd = get_picklecached_thing('yelp-cnet-blend.pickle', get_yelp_blend) export_svdview(svd, "/csc/svdview/data/yelp-tweek.tsv") if False: ### ### Old blending stuff ### def find_factor_from_SVD(t1, t2): sigma1 = t1.svals[0:10] a = sigma1[0] sigma2 = t2.svals[0:10] b = sigma2[0] return float(a / (a + b)) def normalize_and_copy_three(tensor):
def get_thing(fname, func=None): return get_picklecached_thing(local_file(fname), func)
def _get_size_blend(): sizes = get_picklecached_thing(FILEPATH+os.sep+'sizematrix.pickle.gz', _make_size_matrix) cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) size_blend = Blend([sizes, cnet]).normalized(mode=[0,1]).bake() return size_blend
def _make_size_matrix(): matrixlist = [] sizefile = open("size_similarities.physnet") for line in sizefile: l = eval(line) matrixlist.append(((l[0],l[1][1],l[1][2]),40)) return ConceptByFeatureMatrix.from_triples(matrixlist) def _get_size_blend(): sizes = get_picklecached_thing(FILEPATH+os.sep+'sizematrix.pickle.gz', _make_size_matrix) cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) size_blend = Blend([sizes, cnet]).normalized(mode=[0,1]).bake() return size_blend sizeblend = get_picklecached_thing(FILEPATH+os.sep+'sizeblend.pickle.gz', _get_size_blend) sizesvd = sizeblend.svd(k=100) #cnet_norm = conceptnet_2d_from_db('en').normalized() #rawsvd = cnet_norm.svd() def get_similar_size_examples(concept): if not concept in sizeblend: return None concept_vector = sizesvd.weighted_u[concept,:].hat() like_concepts = sizesvd.u_angles_to(concept_vector) return like_concepts def get_similar_raw_examples(concept): concept_vector = rawsvd.weighted_u[concept,:].hat() like_concepts = rawsvd.u_angles_to(concept_vector)
def _get_color_blend(): colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix) cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake() return colorblend
if word == '': continue print color, word matrixlist.append(((word, 'HasColor', color), 10)) matrixlist.append(((word, 'HasProperty', 'colorful'), 10)) matrixlist.append(((word, 'HasProperty', color), 10)) matrixlist.append(((color, 'HasColor', color), 50)) matrixlist.append(((color, 'HasProperty', color), 50)) return ConceptByFeatureMatrix.from_triples(matrixlist) def _get_color_blend(): colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix) cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake() return colorblend colorblend = get_picklecached_thing(FILEPATH+os.sep+'colorblend.pickle.gz', _get_color_blend) thesvd = colorblend.svd(k=100) colorful_concepts = thesvd.u.label_list(0) #print thesvd.summarize(10) colorful_vec = thesvd.v[('right', u'HasProperty', u'colorful'), :] colorlist = ['blue', 'black', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] rgb = {'blue': (0,0,255), 'black': (0,0,0), 'brown': (139, 69, 19), 'green': (0, 255, 0), 'grey': (100,100,100), 'orange': (255, 165,0), 'pink': (255,105,180), 'purple': (160, 32, 240), 'red': (255,0,0), 'white': (255, 255, 255), 'yellow': (255,255,0)} #colorvecs = [(x, thesvd.weighted_u[x,:]) for x in colorlist] colorvecs = [(x, thesvd.weighted_u[x,:]*.1 + thesvd.weighted_v[('right', 'HasColor', x),:]) for x in colorlist] def how_colorful(word, thesvd): wordvc = thesvd.weighted_u[word,:] return wordvc.hat() * make_category(thesvd, concepts=rgb.keys()) def _get_color_mix(adhoc, thesvd): vec_dict = {}
cnet[key] = 0 return cnet def get_yelp_blend(): cnettensor = get_picklecached_thing('cnet.pickle', conceptnet_with_custom_identities, 'ConceptNet') # To regenerate these next two, run yelp_matrix_creation.py yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None, 'Yelp TFIDF') yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None, 'Yelp Cats') print yelpwordstensor print yelpcatstensor return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None, 0.03) if __name__ == '__main__': svd = get_picklecached_thing('yelp-cnet-blend.pickle', get_yelp_blend) export_svdview(svd, "/csc/svdview/data/yelp-tweek.tsv") if False: ### ### Old blending stuff ### def find_factor_from_SVD(t1, t2): sigma1 = t1.svals[0:10] a = sigma1[0] sigma2 = t2.svals[0:10] b = sigma2[0]
def run_cnet_blend(other, FILENAME='blend'): blend = get_picklecached_thing(FILENAME+'.pickle.gz', lambda: make_blend(other)) svd = blend.svd() write_packed(svd.u, FILENAME, unstem=lambda x: x) return svd
from csc.conceptnet4.models import * from csc.conceptnet4.analogyspace import * from csc.divisi.util import get_picklecached_thing from csc.divisi.blend import Blend from csc.divisi.export_svdview import write_packed cnet = get_picklecached_thing('cnet.pickle.gz', lambda: conceptnet_2d_from_db('en')) def make_blend(other): return Blend([cnet, other]) def run_cnet_blend(other, FILENAME='blend'): blend = get_picklecached_thing(FILENAME+'.pickle.gz', lambda: make_blend(other)) svd = blend.svd() write_packed(svd.u, FILENAME, unstem=lambda x: x) return svd def example(): # In the context of this corpus of healthcare-related messages (not # publicly provided), what do people want? health = get_picklecached_thing('health.pickle.gz', None) svd = run_cnet_blend(health, 'health_blend') svd.summarize(10) # Make a labeled vector of similarities to the feature person\Desires. vec = predict_concepts(svd, ('left', 'Desires', 'person')) # Get the list of concepts in ConceptNet. concepts = cnet.label_list(0)