Esempio n. 1
0
 def __init__(self):
     #self.tensor = get_picklecached_thing('tensor.gz')
     #self.svd = self.tensor.svd(k=50)
     self.en_nl = get_nl('en')
     self.normalizer = LemmatizedEuroNL('en')
     self.cnet = conceptnet_2d_from_db('en')
     self.analogyspace = self.cnet.svd(k=100)
Esempio n. 2
0
def run_analogy_space_lang(lang):
    # Open files (fail early on errors)
    tensor_name = tensor_filename(lang)
    tensor_name_new = tensor_name+'_new'
    tensor_file = GzipFile(tensor_name_new, 'wb')

    svd_name = svd_filename(lang)
    svd_name_new = svd_name + '_new'
    
    # Load matrix
    logging.info('Loading %s'% lang)
    cnet_2d = conceptnet_2d_from_db(lang, identities=IDENTITIES, cutoff=CUTOFF)
    logging.info('Normalize %r' % cnet_2d)
    cnet_2d = cnet_2d.normalized()

    # Save tensor
    logging.info('Save tensor as %s' % tensor_name)
    pickle.dump(cnet_2d, tensor_file, -1)
    tensor_file.close()
    os.rename(tensor_name_new, tensor_name)

    logging.info('Running SVD')
    svd = cnet_2d.svd(k=100)

    # Save SVD
    logging.info('Save as %s' % svd_name)
    svd.save_pytables(svd_name_new)
    os.rename(svd_name_new, svd_name)
Esempio n. 3
0
 def __init__(self):
     self.helper = DivsiHelper()
     self.cnet_normalized = conceptnet_2d_from_db('en')
     self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
     self.affectWN = self.affectwn_raw.normalized()
     self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
     
     
     self.EN_NL = get_nl('en')
Esempio n. 4
0
def run_analogy_space_lang(lang):
    # Load matrix
    logging.info('Loading %s'% lang)
    cnet_2d = conceptnet_2d_from_db(lang, identities=IDENTITIES, cutoff=CUTOFF)
    logging.info('Normalize %r' % cnet_2d)
    cnet_2d = cnet_2d.normalized()

    # Save tensor
    fn = tensor_filename(lang)
    logging.info('Save tensor as %s' % fn)
    pickle.dump(cnet_2d, GzipFile(fn+'_new', 'wb'), -1)
    os.rename(fn+'_new', fn)

    logging.info('Running SVD')
    svd = cnet_2d.svd(k=100)

    # Save SVD
    fn = svd_filename(lang)
    logging.info('Save as %s' % fn)
    pickle.dump(svd, open(fn+'_new', 'wb'), -1)
    os.rename(fn+'_new', fn)
Esempio n. 5
0
metric = svd.get_ahat(('dog', ('right', 'IsA', 'pet')))
print ''
print 'here is a measure of the likelihood that a %s is a %s (high numbers show likelihood): %s' %('dog', 'pet', str(metric))

# ...Is a hammer a pet?
metric = svd.get_ahat(('hammer', ('right', 'IsA', 'pet')))
print ''
print 'here is a measure of the likeleiood that a %s is a %s (high numbers show likelihood): %s' %('hammer', 'pet', str(metric))


#get rid of large objects we don't need
tensor = svd = None

#build a 2d analgy space
from csc.conceptnet4.analogyspace import conceptnet_2d_from_db
cnet = conceptnet_2d_from_db('en')
analogyspace = cnet.svd(k=50)

#test some conceptually similar and different things
cow = analogyspace.weighted_u['cow',:]
horse = analogyspace.weighted_u['horse',:]
pencil = analogyspace.weighted_u['pencil',:]
cowVersusHorse = cow.hat().dot(horse.hat())
pencilVerusHorse = pencil.hat().dot(horse.hat())
print ''
print 'on a normalized scale (%s to %s) %s is related to %s:  %s' %('-1.0', '+1.0', 'cow', 'horse', str(cowVersusHorse))
print ''
print 'on a normalized scale (%s to %s) %s is related to %s:  %s' %('-1.0', '+1.0', 'pencil', 'horse', str(pencilVerusHorse))

#find out what a pencil is related to; 
#get a normalized tensor and do pca; 
Esempio n. 6
0
        obj2 = p[1][2]
        sys.stdout.write("\"%s\" \"%s\"" % (obj1, obj2))
        while True:
            response = getch()
            if response == 'z':
                sys.stdout.write("  same\n")
                outfile.write("%s \n" % str(p))
                break
            elif response == 'm':
                sys.stdout.write("  different\n")
                break
            elif response == 'q':
                return

from csc.conceptnet4.analogyspace import conceptnet_2d_from_db
cnet = conceptnet_2d_from_db('en')

similar_size_relations = filter(lambda k: k[1][1] == "SimilarSize", cnet)

print "Similar size relations:"
print similar_size_relations[0:10]

print "'z' for same, 'm' for different"

get_answears(similar_size_relations)
#
#tests = ["test1", "test2", "test3"]
#
#results = get_answears(tests)
#
#print results
Esempio n. 7
0
def test_build_conceptnet():
    from csc.conceptnet4.analogyspace import conceptnet_2d_from_db

    tensor = conceptnet_2d_from_db('en')
    svd = tensor.normalized(mode=0).svd()
    svd.summarize(2)
Esempio n. 8
0
import numpy, feedparser, nltk, simplejson, itertools
from csc.divisi.forgetful_ccipca import CCIPCA
from csc.conceptnet4.analogyspace import conceptnet_2d_from_db
import utils

tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
cthing = utils.get_thing('cnet.pickle.gz', lambda: conceptnet_2d_from_db('en', cutoff=10))
athing = utils.get_thing('spice.pickle.gz')
utils.concepts = set(cthing.label_list(0))

class SocNOC(object):
    def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None):
        self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0)
        self.filters = filters
        self.iteration = 0
        self.touchpoints = []
        self.categories = {}
        
        self.transfreq = transfreq
        self.cnet = utils.feature_cycle(cthing)
        self.cnetfreq = cnetfreq
        self.spice = utils.feature_cycle(athing)
        self.spicefreq = spicefreq
    
    def process_feed(self, feeds):
        self.process_labeled_feed(utils.make_tuples(feeds, None))
    
    def process_labeled_feed(self, feeds):
        for current, word in self.process_feed_list(feeds):
            self.process_post(self.process_feed_item(current), word)
    
Esempio n. 9
0
    try:
        concepts = Concept.get(w, 'en')
        out.append(w)
        for a in concepts.get_assertions()[:20]:
            out.append("     " + str(a))
        out.append('')     
    except:
        pass

'''Create a pca based on the normalized relationships between every item on the list
   and every other item known to the database (row-wise normalization).  This produces a scale-
   free assay of how similar other things are.  In other words, we ask what concepts are most
   similar to each of our key words.  The max is 1.0.
   '''
pca_axes = 20  #this is the number of axes we'll extimate
cnet = conceptnet_2d_from_db('en')   #this is the database we're drawing from 
cnet_norm = conceptnet_2d_from_db('en').normalized()   #this normalizes the data
analogyspace = cnet_norm.svd(k=pca_axes)   #this conducts the pca on the normalized data

out.append(' ')
out.append('*********associateive strength normalized (scale -1.0 to 1.0 ) ************')
out.append(' ')
mypairs = []
for uc in xcombinations(mywords,2): mypairs.append(uc)
outarr=[];tmparr=[]; grouparr= [];lasttopic = mypairs[0][0];allarr=[]
out.append('')
for p in mypairs:
    a = None; b = None; a_versus_b = None; thistopic = p[0]
    aword=p[0]; bword = p[1]
    try:       
        a= analogyspace.weighted_u[aword,:]
Esempio n. 10
0
import numpy, feedparser, nltk, simplejson, itertools
from csc.divisi.forgetful_ccipca import CCIPCA
from csc.conceptnet4.analogyspace import conceptnet_2d_from_db
import utils

tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
cthing = utils.get_thing('cnet.pickle.gz',
                         lambda: conceptnet_2d_from_db('en', cutoff=10))
athing = utils.get_thing('spice.pickle.gz')
utils.concepts = set(cthing.label_list(0))


class SocNOC(object):
    def __init__(self,
                 transfreq=1,
                 cnetfreq=2,
                 spicefreq=10,
                 k=20,
                 filters=None):
        self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0)
        self.filters = filters
        self.iteration = 0
        self.touchpoints = []
        self.categories = {}

        self.transfreq = transfreq
        self.cnet = utils.feature_cycle(cthing)
        self.cnetfreq = cnetfreq
        self.spice = utils.feature_cycle(athing)
        self.spicefreq = spicefreq
Esempio n. 11
0
def test_build_conceptnet():
    from csc.conceptnet4.analogyspace import conceptnet_2d_from_db

    tensor = conceptnet_2d_from_db('en')
    svd = tensor.normalized(mode=0).svd()
    svd.summarize(2)