Ejemplo n.º 1
0
def merge_vector_spaces(subspace_dir, mergers):
    merged = None
    for sourceA, sourceB, target in mergers:
        print('Merging: %s + %s -> %s' % (sourceA, sourceB, target))
        spaceA = AssocSpace.load_dir(os.path.join(subspace_dir, sourceA))
        spaceB = AssocSpace.load_dir(os.path.join(subspace_dir, sourceB))

        # On the first step, we want to keep all the axes from merging subparts.
        # Through most of the merging, we want to maintain that number of axes.
        # At the end, we want to go back to the original number of axes.

        # For example, when we are merging 300-dimensional spaces, the
        # intermediate merge results will have 600 dimensions, and the final
        # result will have 300 dimensions again.

        # We don't refer to the number of axes in spaceB in this code, because
        # we're assuming all the sub-parts have equal numbers of axes.

        if target.startswith('part'):
            k = spaceA.k * 2
        elif target == 'merged_complete':
            k = spaceA.k // 2
        else:
            k = spaceA.k

        merged = spaceA.merged_with(spaceB, k=k)
        del spaceA
        del spaceB
        merged.save_dir(os.path.join(subspace_dir, target))
    
    magnitudes = (merged.u ** 2).sum(1)
    good_indices = np.flatnonzero(magnitudes >= 1e-5)
    filtered = merged[good_indices]
    filtered.save_dir(os.path.join(subspace_dir, 'merged_filtered'))
    return filtered
def optimizeAllAndInferConceptsModelTwo(assocDir):
	## load assocSpace
	assocSpace = AssocSpace.load_dir(assocDir);
	## targets and image-indices dictionary
	targetsToImageIndicesAndWeights={}; # target-word ->[(index,weight_i)...]
	targetsToCentralities={}; #target-word -> centrality-score
	loadTargetWordsFromAllImages(targetsToCentralities,targetsToImageIndicesAndWeights);
	# Model
	m = Model("psl2")
	variables= set();
	targets = {}
	loadDecisionVariablesForTargets(m,targets,variables,targetsToImageIndicesAndWeights);
	## TODO: populate the rules
	objective = LinExpr();
	objective = createObjective(m,targets,variables,objective,assocSpace,targetsToCentralities,targetsToImageIndicesAndWeights);
				
	m.update();
	m.setObjective(objective);
	
	# The objective is to minimize the costs
	m.modelSense = GRB.MINIMIZE

	# Update model to integrate new variables
	m.update()
	m.optimize();
	m.write('out2.lp');
	m.write('out2.sol');
	outputFile = open(sys.argv[1]+ sys.argv[2]+"_inferred.txt","w");
	printSolution(m,targets,outputFile);
Ejemplo n.º 3
0
def load_assoc():
    """
    Load the association matrix. Requires the open source Python package
    'assoc_space'.
    """
    global commonsense_assoc
    if commonsense_assoc: return commonsense_assoc
    dirname = ASSOC_DIR
    commonsense_assoc = AssocSpace.load_dir(ASSOC_DIR)
    return commonsense_assoc
Ejemplo n.º 4
0
def load_assoc():
    """
    Load the association matrix. Requires the open source Python package
    'assoc_space'.
    """
    global commonsense_assoc
    if commonsense_assoc: return commonsense_assoc
    dirname = ASSOC_DIR
    commonsense_assoc = AssocSpace.load_dir(ASSOC_DIR)
    return commonsense_assoc
Ejemplo n.º 5
0
    def load(self):
        if self.assoc is not None:
            return

        try:
            from assoc_space import AssocSpace
            self.assoc = AssocSpace.load_dir(self.path)
        except ImportError:
            raise MissingAssocSpace("The assoc_space package is not installed.")
        except ZeroDivisionError:
            raise MissingAssocSpace("The space of term associations could not "
                                    "be loaded.")
Ejemplo n.º 6
0
    def load(self):
        if self.assoc is not None:
            return

        try:
            from assoc_space import AssocSpace
            self.assoc = AssocSpace.load_dir(self.path)
        except ImportError:
            raise MissingAssocSpace(
                "The assoc_space package is not installed.")
        except ZeroDivisionError:
            raise MissingAssocSpace("The space of term associations could not "
                                    "be loaded.")
Ejemplo n.º 7
0
def test_dir_round_trip():
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    assoc.save_dir('/tmp/assoc_test')
    assoc2 = AssocSpace.load_dir('/tmp/assoc_test')
    eq_(assoc, assoc2)
Ejemplo n.º 8
0
    if not os.path.isfile(sortedIndicesFileName):
        sim = assocSpace.assoc.dot(assocSpace.row_named("/c/en/" + word))
        indices = np.argsort(sim)[::-1]
        np.savez_compressed(sortedIndicesFileName, indices[:1000])
        sim_first1k = np.array([sim[index] for index in indices[:1000]])
        np.savez_compressed(simFileName, sim_first1k)

    sim = np.load(simFileName)
    indices = np.load(sortedIndicesFileName)
    data = []
    for index in indices:
        if len(data) == limit:
            break
        if filterEnglishWords(names[index]):
            data.append((names[index], sim[index]))
    return data


minSimilarity = -1
maxSimilarity = 1
minCentrality = -0.00188222
maxCentrality = 0.00324597
assocDir = "../conceptnet5/data/assoc/assoc-space-5.4"
assocSpace = AssocSpace.load_dir(assocDir)
names = assocSpace.labels
word2vec_model = models.word2vec.Word2Vec.load_word2vec_format(
    '../../../DATASETS/GoogleNews-vectors-negative300.bin', binary=True)
word2vec_model.init_sims(replace=True)
TOO_RARE_WORD_CODE = -3
NOT_FOUND_IN_CORPUS_CODE = -2
Ejemplo n.º 9
0
def main(dir):
	assoc = AssocSpace.load_dir(dir)
	test(assoc)
Ejemplo n.º 10
0
def main(dir):
    assoc = AssocSpace.load_dir(dir)
    test(assoc)
Ejemplo n.º 11
0
def test_dir_round_trip():
    assoc = AssocSpace.from_entries(entries, 3)
    assoc.save_dir('/tmp/assoc_test')
    assoc2 = AssocSpace.load_dir('/tmp/assoc_test')
    eq_(assoc, assoc2)
from assoc_space import AssocSpace
import sys
import threading
import math


def computeNormalizedValue(value, maxV, minV, addOne=False):
    if addOne:
        return (value - minV + 1) / (maxV - minV + 1)
    return (value - minV) / (maxV - minV)


if len(sys.argv) < 4:
    print "python conceptnetAssocSpace.py <seedsfile> <targetfile> <AssocSpaceDirectory>"
    sys.exit()
assocSpace = AssocSpace.load_dir(sys.argv[3])
words = []
minSimilarity = -0.358846
maxSimilarity = 0.999747
minCentrality = -0.00188222
maxCentrality = 0.00324597
with open(sys.argv[1], "r") as f:
    i = 0
    for line in f:
        if line.startswith("##"):
            continue
        words = line.split("\t")
        word1 = "/c/en/" + words[0].strip()
        with open(sys.argv[2], "r") as f2:
            for line in f2:
                if line.startswith("##"):