def __init__(self): #self.tensor = get_picklecached_thing('tensor.gz') #self.svd = self.tensor.svd(k=50) self.en_nl = get_nl('en') self.normalizer = LemmatizedEuroNL('en') self.cnet = conceptnet_2d_from_db('en') self.analogyspace = self.cnet.svd(k=100)
def run_analogy_space_lang(lang): # Open files (fail early on errors) tensor_name = tensor_filename(lang) tensor_name_new = tensor_name+'_new' tensor_file = GzipFile(tensor_name_new, 'wb') svd_name = svd_filename(lang) svd_name_new = svd_name + '_new' # Load matrix logging.info('Loading %s'% lang) cnet_2d = conceptnet_2d_from_db(lang, identities=IDENTITIES, cutoff=CUTOFF) logging.info('Normalize %r' % cnet_2d) cnet_2d = cnet_2d.normalized() # Save tensor logging.info('Save tensor as %s' % tensor_name) pickle.dump(cnet_2d, tensor_file, -1) tensor_file.close() os.rename(tensor_name_new, tensor_name) logging.info('Running SVD') svd = cnet_2d.svd(k=100) # Save SVD logging.info('Save as %s' % svd_name) svd.save_pytables(svd_name_new) os.rename(svd_name_new, svd_name)
def __init__(self): self.helper = DivsiHelper() self.cnet_normalized = conceptnet_2d_from_db('en') self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle') self.affectWN = self.affectwn_raw.normalized() self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd() self.EN_NL = get_nl('en')
def run_analogy_space_lang(lang): # Load matrix logging.info('Loading %s'% lang) cnet_2d = conceptnet_2d_from_db(lang, identities=IDENTITIES, cutoff=CUTOFF) logging.info('Normalize %r' % cnet_2d) cnet_2d = cnet_2d.normalized() # Save tensor fn = tensor_filename(lang) logging.info('Save tensor as %s' % fn) pickle.dump(cnet_2d, GzipFile(fn+'_new', 'wb'), -1) os.rename(fn+'_new', fn) logging.info('Running SVD') svd = cnet_2d.svd(k=100) # Save SVD fn = svd_filename(lang) logging.info('Save as %s' % fn) pickle.dump(svd, open(fn+'_new', 'wb'), -1) os.rename(fn+'_new', fn)
metric = svd.get_ahat(('dog', ('right', 'IsA', 'pet'))) print '' print 'here is a measure of the likelihood that a %s is a %s (high numbers show likelihood): %s' %('dog', 'pet', str(metric)) # ...Is a hammer a pet? metric = svd.get_ahat(('hammer', ('right', 'IsA', 'pet'))) print '' print 'here is a measure of the likeleiood that a %s is a %s (high numbers show likelihood): %s' %('hammer', 'pet', str(metric)) #get rid of large objects we don't need tensor = svd = None #build a 2d analgy space from csc.conceptnet4.analogyspace import conceptnet_2d_from_db cnet = conceptnet_2d_from_db('en') analogyspace = cnet.svd(k=50) #test some conceptually similar and different things cow = analogyspace.weighted_u['cow',:] horse = analogyspace.weighted_u['horse',:] pencil = analogyspace.weighted_u['pencil',:] cowVersusHorse = cow.hat().dot(horse.hat()) pencilVerusHorse = pencil.hat().dot(horse.hat()) print '' print 'on a normalized scale (%s to %s) %s is related to %s: %s' %('-1.0', '+1.0', 'cow', 'horse', str(cowVersusHorse)) print '' print 'on a normalized scale (%s to %s) %s is related to %s: %s' %('-1.0', '+1.0', 'pencil', 'horse', str(pencilVerusHorse)) #find out what a pencil is related to; #get a normalized tensor and do pca;
obj2 = p[1][2] sys.stdout.write("\"%s\" \"%s\"" % (obj1, obj2)) while True: response = getch() if response == 'z': sys.stdout.write(" same\n") outfile.write("%s \n" % str(p)) break elif response == 'm': sys.stdout.write(" different\n") break elif response == 'q': return from csc.conceptnet4.analogyspace import conceptnet_2d_from_db cnet = conceptnet_2d_from_db('en') similar_size_relations = filter(lambda k: k[1][1] == "SimilarSize", cnet) print "Similar size relations:" print similar_size_relations[0:10] print "'z' for same, 'm' for different" get_answears(similar_size_relations) # #tests = ["test1", "test2", "test3"] # #results = get_answears(tests) # #print results
def test_build_conceptnet(): from csc.conceptnet4.analogyspace import conceptnet_2d_from_db tensor = conceptnet_2d_from_db('en') svd = tensor.normalized(mode=0).svd() svd.summarize(2)
import numpy, feedparser, nltk, simplejson, itertools from csc.divisi.forgetful_ccipca import CCIPCA from csc.conceptnet4.analogyspace import conceptnet_2d_from_db import utils tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') cthing = utils.get_thing('cnet.pickle.gz', lambda: conceptnet_2d_from_db('en', cutoff=10)) athing = utils.get_thing('spice.pickle.gz') utils.concepts = set(cthing.label_list(0)) class SocNOC(object): def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None): self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0) self.filters = filters self.iteration = 0 self.touchpoints = [] self.categories = {} self.transfreq = transfreq self.cnet = utils.feature_cycle(cthing) self.cnetfreq = cnetfreq self.spice = utils.feature_cycle(athing) self.spicefreq = spicefreq def process_feed(self, feeds): self.process_labeled_feed(utils.make_tuples(feeds, None)) def process_labeled_feed(self, feeds): for current, word in self.process_feed_list(feeds): self.process_post(self.process_feed_item(current), word)
try: concepts = Concept.get(w, 'en') out.append(w) for a in concepts.get_assertions()[:20]: out.append(" " + str(a)) out.append('') except: pass '''Create a pca based on the normalized relationships between every item on the list and every other item known to the database (row-wise normalization). This produces a scale- free assay of how similar other things are. In other words, we ask what concepts are most similar to each of our key words. The max is 1.0. ''' pca_axes = 20 #this is the number of axes we'll extimate cnet = conceptnet_2d_from_db('en') #this is the database we're drawing from cnet_norm = conceptnet_2d_from_db('en').normalized() #this normalizes the data analogyspace = cnet_norm.svd(k=pca_axes) #this conducts the pca on the normalized data out.append(' ') out.append('*********associateive strength normalized (scale -1.0 to 1.0 ) ************') out.append(' ') mypairs = [] for uc in xcombinations(mywords,2): mypairs.append(uc) outarr=[];tmparr=[]; grouparr= [];lasttopic = mypairs[0][0];allarr=[] out.append('') for p in mypairs: a = None; b = None; a_versus_b = None; thistopic = p[0] aword=p[0]; bword = p[1] try: a= analogyspace.weighted_u[aword,:]
import numpy, feedparser, nltk, simplejson, itertools from csc.divisi.forgetful_ccipca import CCIPCA from csc.conceptnet4.analogyspace import conceptnet_2d_from_db import utils tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') cthing = utils.get_thing('cnet.pickle.gz', lambda: conceptnet_2d_from_db('en', cutoff=10)) athing = utils.get_thing('spice.pickle.gz') utils.concepts = set(cthing.label_list(0)) class SocNOC(object): def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None): self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0) self.filters = filters self.iteration = 0 self.touchpoints = [] self.categories = {} self.transfreq = transfreq self.cnet = utils.feature_cycle(cthing) self.cnetfreq = cnetfreq self.spice = utils.feature_cycle(athing) self.spicefreq = spicefreq