예제 #1
0
def get_yelp_blend():
    cnettensor = get_picklecached_thing('cnet.pickle',
      conceptnet_with_custom_identities, 'ConceptNet')
    # To regenerate these next two, run yelp_matrix_creation.py
    yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None, 'Yelp TFIDF')
    yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None, 'Yelp Cats')
    print yelpwordstensor
    print yelpcatstensor
    return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None,
    0.03)
예제 #2
0
def get_yelp_blend():
    cnettensor = get_picklecached_thing('cnet.pickle',
                                        conceptnet_with_custom_identities,
                                        'ConceptNet')
    # To regenerate these next two, run yelp_matrix_creation.py
    yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None,
                                             'Yelp TFIDF')
    yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None,
                                            'Yelp Cats')
    print yelpwordstensor
    print yelpcatstensor
    return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None,
                             0.03)
예제 #3
0
def example():
    # In the context of this corpus of healthcare-related messages (not
    # publicly provided), what do people want?
    
    health = get_picklecached_thing('health.pickle.gz', None)
    svd = run_cnet_blend(health, 'health_blend')
    svd.summarize(10)

    # Make a labeled vector of similarities to the feature person\Desires.
    vec = predict_concepts(svd, ('left', 'Desires', 'person'))

    # Get the list of concepts in ConceptNet.
    concepts = cnet.label_list(0)

    # Look for concepts that *aren't* in ConceptNet and are ranked highly
    # in the vector of results.
    for item in vec.top_items(5000):
        
        # skip over stopwords
        contentful = False
        for word in item[0].split(' '):
            if not en.nl.is_stopword(word):
                contentful = True
                break

        if contentful and item[0] not in concepts:
            # found one. print it along with its score.
            print item[0], item[1]
예제 #4
0
    cnettensor = get_picklecached_thing('cnet.pickle',
                                        conceptnet_with_custom_identities,
                                        'ConceptNet')
    # To regenerate these next two, run yelp_matrix_creation.py
    yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None,
                                             'Yelp TFIDF')
    yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None,
                                            'Yelp Cats')
    print yelpwordstensor
    print yelpcatstensor
    return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None,
                             0.03)


if __name__ == '__main__':
    svd = get_picklecached_thing('yelp-cnet-blend.pickle', get_yelp_blend)
    export_svdview(svd, "/csc/svdview/data/yelp-tweek.tsv")

if False:
    ###
    ### Old blending stuff
    ###

    def find_factor_from_SVD(t1, t2):
        sigma1 = t1.svals[0:10]
        a = sigma1[0]
        sigma2 = t2.svals[0:10]
        b = sigma2[0]
        return float(a / (a + b))

    def normalize_and_copy_three(tensor):
예제 #5
0
def get_thing(fname, func=None):
    return get_picklecached_thing(local_file(fname), func)
예제 #6
0
def _get_size_blend():
    sizes = get_picklecached_thing(FILEPATH+os.sep+'sizematrix.pickle.gz', _make_size_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    size_blend = Blend([sizes, cnet]).normalized(mode=[0,1]).bake()
    return size_blend
예제 #7
0
def _make_size_matrix():
    matrixlist = []
    sizefile = open("size_similarities.physnet")
    for line in sizefile:
        l = eval(line)
        matrixlist.append(((l[0],l[1][1],l[1][2]),40))
    return ConceptByFeatureMatrix.from_triples(matrixlist)
    
    
def _get_size_blend():
    sizes = get_picklecached_thing(FILEPATH+os.sep+'sizematrix.pickle.gz', _make_size_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    size_blend = Blend([sizes, cnet]).normalized(mode=[0,1]).bake()
    return size_blend
    
sizeblend = get_picklecached_thing(FILEPATH+os.sep+'sizeblend.pickle.gz', _get_size_blend)
sizesvd = sizeblend.svd(k=100)

#cnet_norm = conceptnet_2d_from_db('en').normalized()
#rawsvd = cnet_norm.svd()

def get_similar_size_examples(concept):
    if not concept in sizeblend:
        return None
    concept_vector = sizesvd.weighted_u[concept,:].hat()
    like_concepts = sizesvd.u_angles_to(concept_vector)
    return like_concepts    

def get_similar_raw_examples(concept):
    concept_vector = rawsvd.weighted_u[concept,:].hat()
    like_concepts = rawsvd.u_angles_to(concept_vector)
예제 #8
0
def _get_color_blend():
    colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake()
    return colorblend
예제 #9
0
            if word == '': continue
            print color, word
            matrixlist.append(((word, 'HasColor', color), 10))
            matrixlist.append(((word, 'HasProperty', 'colorful'), 10))
            matrixlist.append(((word, 'HasProperty', color), 10))
        matrixlist.append(((color, 'HasColor', color), 50))
        matrixlist.append(((color, 'HasProperty', color), 50))
    return ConceptByFeatureMatrix.from_triples(matrixlist)
              
def _get_color_blend():
    colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake()
    return colorblend

colorblend = get_picklecached_thing(FILEPATH+os.sep+'colorblend.pickle.gz', _get_color_blend)
thesvd = colorblend.svd(k=100)
colorful_concepts = thesvd.u.label_list(0)
#print thesvd.summarize(10)
colorful_vec = thesvd.v[('right', u'HasProperty', u'colorful'), :]
colorlist = ['blue', 'black', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
rgb = {'blue': (0,0,255), 'black': (0,0,0), 'brown': (139, 69, 19), 'green': (0, 255, 0), 'grey': (100,100,100), 'orange': (255, 165,0), 'pink': (255,105,180), 'purple': (160, 32, 240), 'red': (255,0,0), 'white': (255, 255, 255), 'yellow': (255,255,0)}
#colorvecs = [(x, thesvd.weighted_u[x,:]) for x in colorlist]
colorvecs = [(x, thesvd.weighted_u[x,:]*.1 + thesvd.weighted_v[('right', 'HasColor', x),:]) for x in colorlist]

def how_colorful(word, thesvd):
    wordvc = thesvd.weighted_u[word,:]
    return wordvc.hat() * make_category(thesvd, concepts=rgb.keys())

def _get_color_mix(adhoc, thesvd):
    vec_dict = {}
예제 #10
0
            cnet[key] = 0
    return cnet

def get_yelp_blend():
    cnettensor = get_picklecached_thing('cnet.pickle',
      conceptnet_with_custom_identities, 'ConceptNet')
    # To regenerate these next two, run yelp_matrix_creation.py
    yelpwordstensor = get_picklecached_thing('yelp-tfidf.pickle', None, 'Yelp TFIDF')
    yelpcatstensor = get_picklecached_thing('yelp_cats_and_price.pickle', None, 'Yelp Cats')
    print yelpwordstensor
    print yelpcatstensor
    return run_3blend_simple(cnettensor, yelpwordstensor, yelpcatstensor, None,
    0.03)

if __name__ == '__main__':
    svd = get_picklecached_thing('yelp-cnet-blend.pickle', get_yelp_blend)
    export_svdview(svd, "/csc/svdview/data/yelp-tweek.tsv")




if False:
    ###
    ### Old blending stuff
    ###

    def find_factor_from_SVD(t1, t2):
        sigma1 = t1.svals[0:10]
        a = sigma1[0]
        sigma2 = t2.svals[0:10]
        b = sigma2[0]
예제 #11
0
파일: utils.py 프로젝트: imclab/twittermap
def get_thing(fname, func=None):
    return get_picklecached_thing(local_file(fname), func)
예제 #12
0
def run_cnet_blend(other, FILENAME='blend'):
    blend = get_picklecached_thing(FILENAME+'.pickle.gz',
        lambda: make_blend(other))
    svd = blend.svd()
    write_packed(svd.u, FILENAME, unstem=lambda x: x)
    return svd
예제 #13
0
from csc.conceptnet4.models import *
from csc.conceptnet4.analogyspace import *
from csc.divisi.util import get_picklecached_thing
from csc.divisi.blend import Blend
from csc.divisi.export_svdview import write_packed

cnet = get_picklecached_thing('cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))

def make_blend(other):
    return Blend([cnet, other])

def run_cnet_blend(other, FILENAME='blend'):
    blend = get_picklecached_thing(FILENAME+'.pickle.gz',
        lambda: make_blend(other))
    svd = blend.svd()
    write_packed(svd.u, FILENAME, unstem=lambda x: x)
    return svd

def example():
    # In the context of this corpus of healthcare-related messages (not
    # publicly provided), what do people want?
    
    health = get_picklecached_thing('health.pickle.gz', None)
    svd = run_cnet_blend(health, 'health_blend')
    svd.summarize(10)

    # Make a labeled vector of similarities to the feature person\Desires.
    vec = predict_concepts(svd, ('left', 'Desires', 'person'))

    # Get the list of concepts in ConceptNet.
    concepts = cnet.label_list(0)