def get_analogy_blend(self): other_matrices = [matrix for name, matrix in self.other_matrices.items() if name.endswith('.smat')] other_matrices = self.other_matrices.values() # find concepts used at least twice docs = self.get_documents_matrix() concept_counts = docs.col_op(len) valid_concepts = set() for concept, count in concept_counts.to_sparse().named_items(): if count >= 3: valid_concepts.add(concept) # extract relevant concepts from the doc matrix; # transpose it so it's concepts vs. documents orig_doc_matrix = self.get_documents_matrix() #sdoc_indices = [orig_doc_matrix.row_index(sdoc.name) # for sdoc in self.study_documents] concept_indices = [orig_doc_matrix.col_index(c) for c in valid_concepts] # NOTE: canonical documents can affect the stats this way. # Is there a clean way to fix this? doc_matrix = orig_doc_matrix[:,concept_indices].T.squish() if doc_matrix is None: theblend = blend(other_matrices) study_concepts = set(theblend.row_labels) else: theblend = blend([doc_matrix] + other_matrices) study_concepts = set(doc_matrix.row_labels) return theblend, study_concepts
def get_analogy_blend(self): other_matrices = [ matrix for name, matrix in self.other_matrices.items() if name.endswith('.smat') ] other_matrices = self.other_matrices.values() # find concepts used at least twice docs = self.get_documents_matrix() concept_counts = docs.col_op(len) valid_concepts = set() for concept, count in concept_counts.to_sparse().named_items(): if count >= 3: valid_concepts.add(concept) # extract relevant concepts from the doc matrix; # transpose it so it's concepts vs. documents orig_doc_matrix = self.get_documents_matrix() #sdoc_indices = [orig_doc_matrix.row_index(sdoc.name) # for sdoc in self.study_documents] concept_indices = [ orig_doc_matrix.col_index(c) for c in valid_concepts ] # NOTE: canonical documents can affect the stats this way. # Is there a clean way to fix this? doc_matrix = orig_doc_matrix[:, concept_indices].T.squish() if doc_matrix is None: theblend = blend(other_matrices) study_concepts = set(theblend.row_labels) else: theblend = blend([doc_matrix] + other_matrices) study_concepts = set(doc_matrix.row_labels) return theblend, study_concepts
def get_assoc_blend(self): other_matrices = [] doc_matrix = self.get_documents_assoc() self._step('Blending...') for name, matrix in self.other_matrices.items(): # use association matrices only # (unless we figure out how to do both kinds of blending) if name.endswith('.assoc.smat'): if matrix.shape[0] != matrix.shape[1]: raise ValueError("The matrix %s is not square" % name) other_matrices.append(matrix) if doc_matrix is None: theblend = blend(other_matrices) study_concepts = set(theblend.row_labels) else: theblend = blend([doc_matrix] + other_matrices) study_concepts = set(doc_matrix.row_labels) return theblend, study_concepts
def make_blend(thefile): conceptnet = divisi2.network.conceptnet_matrix('en').normalize_all() thegame = divisi2.load(thefile).normalize_all() blended_matrix = blend([conceptnet, thegame], [0.9, 0.1]) u,s,v = blended_matrix.svd() similarity = divisi2.reconstruct_similarity(u, s) # offset=1.5) pd.mkdir(thefile.split('.')[0]) pd[thefile.split('.')[0]]['blend'] = similarity return similarity
obj_list.append(obj) # weighted_triple = (rel_triple, weight) weighted_relations.append(rel_triple) print len(weighted_relations) #print len(obj_list) obj_list = set(obj_list) print len(obj_list) matrix = divisi2.make_sparse(weighted_relations) #print matrix # ConceptNet Matrix A = divisi2.network.conceptnet_matrix('en') A_concept_axes, A_axis_weights, A_feature_axes = A.svd(k=100) blended_matrix = blend([matrix, A]) concept_axes, axis_weights, feature_axes = blended_matrix.svd(k=100) common_objects = list(set(obj_list).intersection(A.row_labels)) print len(A.row_labels) # Save embeddings for ConceptNet cnet_object_embeddings = np.array( [A_concept_axes.row_named(obj) for obj in common_objects]) np.save('cnet_object_embeddings.npy', cnet_object_embeddings) # Save embeddings for Blended Matrix blended_object_embeddings = np.array( [concept_axes.row_named(obj) for obj in common_objects]) np.save('blended_object_embeddings.npy', blended_object_embeddings) v = []