def merge_vector_spaces(subspace_dir, mergers): merged = None for sourceA, sourceB, target in mergers: print('Merging: %s + %s -> %s' % (sourceA, sourceB, target)) spaceA = AssocSpace.load_dir(os.path.join(subspace_dir, sourceA)) spaceB = AssocSpace.load_dir(os.path.join(subspace_dir, sourceB)) # On the first step, we want to keep all the axes from merging subparts. # Through most of the merging, we want to maintain that number of axes. # At the end, we want to go back to the original number of axes. # For example, when we are merging 300-dimensional spaces, the # intermediate merge results will have 600 dimensions, and the final # result will have 300 dimensions again. # We don't refer to the number of axes in spaceB in this code, because # we're assuming all the sub-parts have equal numbers of axes. if target.startswith('part'): k = spaceA.k * 2 elif target == 'merged_complete': k = spaceA.k // 2 else: k = spaceA.k merged = spaceA.merged_with(spaceB, k=k) del spaceA del spaceB merged.save_dir(os.path.join(subspace_dir, target)) magnitudes = (merged.u ** 2).sum(1) good_indices = np.flatnonzero(magnitudes >= 1e-5) filtered = merged[good_indices] filtered.save_dir(os.path.join(subspace_dir, 'merged_filtered')) return filtered
def optimizeAllAndInferConceptsModelTwo(assocDir): ## load assocSpace assocSpace = AssocSpace.load_dir(assocDir); ## targets and image-indices dictionary targetsToImageIndicesAndWeights={}; # target-word ->[(index,weight_i)...] targetsToCentralities={}; #target-word -> centrality-score loadTargetWordsFromAllImages(targetsToCentralities,targetsToImageIndicesAndWeights); # Model m = Model("psl2") variables= set(); targets = {} loadDecisionVariablesForTargets(m,targets,variables,targetsToImageIndicesAndWeights); ## TODO: populate the rules objective = LinExpr(); objective = createObjective(m,targets,variables,objective,assocSpace,targetsToCentralities,targetsToImageIndicesAndWeights); m.update(); m.setObjective(objective); # The objective is to minimize the costs m.modelSense = GRB.MINIMIZE # Update model to integrate new variables m.update() m.optimize(); m.write('out2.lp'); m.write('out2.sol'); outputFile = open(sys.argv[1]+ sys.argv[2]+"_inferred.txt","w"); printSolution(m,targets,outputFile);
def load_assoc(): """ Load the association matrix. Requires the open source Python package 'assoc_space'. """ global commonsense_assoc if commonsense_assoc: return commonsense_assoc dirname = ASSOC_DIR commonsense_assoc = AssocSpace.load_dir(ASSOC_DIR) return commonsense_assoc
def load_assoc(): """ Load the association matrix. Requires the open source Python package 'assoc_space'. """ global commonsense_assoc if commonsense_assoc: return commonsense_assoc dirname = ASSOC_DIR commonsense_assoc = AssocSpace.load_dir(ASSOC_DIR) return commonsense_assoc
def load(self): if self.assoc is not None: return try: from assoc_space import AssocSpace self.assoc = AssocSpace.load_dir(self.path) except ImportError: raise MissingAssocSpace("The assoc_space package is not installed.") except ZeroDivisionError: raise MissingAssocSpace("The space of term associations could not " "be loaded.")
def load(self): if self.assoc is not None: return try: from assoc_space import AssocSpace self.assoc = AssocSpace.load_dir(self.path) except ImportError: raise MissingAssocSpace( "The assoc_space package is not installed.") except ZeroDivisionError: raise MissingAssocSpace("The space of term associations could not " "be loaded.")
def test_dir_round_trip(): assoc = AssocSpace.from_entries(ENTRIES, k=3) assoc.save_dir('/tmp/assoc_test') assoc2 = AssocSpace.load_dir('/tmp/assoc_test') eq_(assoc, assoc2)
if not os.path.isfile(sortedIndicesFileName): sim = assocSpace.assoc.dot(assocSpace.row_named("/c/en/" + word)) indices = np.argsort(sim)[::-1] np.savez_compressed(sortedIndicesFileName, indices[:1000]) sim_first1k = np.array([sim[index] for index in indices[:1000]]) np.savez_compressed(simFileName, sim_first1k) sim = np.load(simFileName) indices = np.load(sortedIndicesFileName) data = [] for index in indices: if len(data) == limit: break if filterEnglishWords(names[index]): data.append((names[index], sim[index])) return data minSimilarity = -1 maxSimilarity = 1 minCentrality = -0.00188222 maxCentrality = 0.00324597 assocDir = "../conceptnet5/data/assoc/assoc-space-5.4" assocSpace = AssocSpace.load_dir(assocDir) names = assocSpace.labels word2vec_model = models.word2vec.Word2Vec.load_word2vec_format( '../../../DATASETS/GoogleNews-vectors-negative300.bin', binary=True) word2vec_model.init_sims(replace=True) TOO_RARE_WORD_CODE = -3 NOT_FOUND_IN_CORPUS_CODE = -2
def main(dir): assoc = AssocSpace.load_dir(dir) test(assoc)
def main(dir): assoc = AssocSpace.load_dir(dir) test(assoc)
def test_dir_round_trip(): assoc = AssocSpace.from_entries(entries, 3) assoc.save_dir('/tmp/assoc_test') assoc2 = AssocSpace.load_dir('/tmp/assoc_test') eq_(assoc, assoc2)
from assoc_space import AssocSpace import sys import threading import math def computeNormalizedValue(value, maxV, minV, addOne=False): if addOne: return (value - minV + 1) / (maxV - minV + 1) return (value - minV) / (maxV - minV) if len(sys.argv) < 4: print "python conceptnetAssocSpace.py <seedsfile> <targetfile> <AssocSpaceDirectory>" sys.exit() assocSpace = AssocSpace.load_dir(sys.argv[3]) words = [] minSimilarity = -0.358846 maxSimilarity = 0.999747 minCentrality = -0.00188222 maxCentrality = 0.00324597 with open(sys.argv[1], "r") as f: i = 0 for line in f: if line.startswith("##"): continue words = line.split("\t") word1 = "/c/en/" + words[0].strip() with open(sys.argv[2], "r") as f2: for line in f2: if line.startswith("##"):