def buildMatrix(csv_filename, pickle_name, languages=['en'], cutoff=2): csv_file = open(csv_filename, 'rb') asserts_dict = extractAsserts(csv_file, languages) csv_file.close() triples = formTriples(asserts_dict) matrix = divisi2.make_sparse(triples) matrix = matrix.squish(cutoff) divisi2.save(matrix, pickle_name)
weighted_relations.append(rel_triple) for idx in range(len(q)): obj = q[idx][2] subj = q[idx][0] weight = 4.0 rel_triple = (weight, obj, subj) obj_list.append(obj) # weighted_triple = (rel_triple, weight) weighted_relations.append(rel_triple) print len(weighted_relations) #print len(obj_list) obj_list = set(obj_list) print len(obj_list) matrix = divisi2.make_sparse(weighted_relations) #print matrix # ConceptNet Matrix A = divisi2.network.conceptnet_matrix('en') A_concept_axes, A_axis_weights, A_feature_axes = A.svd(k=100) blended_matrix = blend([matrix, A]) concept_axes, axis_weights, feature_axes = blended_matrix.svd(k=100) common_objects = list(set(obj_list).intersection(A.row_labels)) print len(A.row_labels) # Save embeddings for ConceptNet cnet_object_embeddings = np.array( [A_concept_axes.row_named(obj) for obj in common_objects])
from conceptnet.models import * import divisi2 import os import codecs sparse_pieces = [] for filename in os.listdir('.'): if filename.startswith('conceptnet_zh_'): for line in codecs.open(filename, encoding='utf-8', errors='replace'): line = line.strip() if line: parts = line.split(', ') user, frame_id, concept1, concept2 = parts relation = Frame.objects.get(id=int(frame_id)).relation left_feature = u"%s\\%s" % (concept1, relation) right_feature = u"%s/%s" % (relation, concept2) sparse_pieces.append((1, concept1, right_feature)) sparse_pieces.append((1, concept2, left_feature)) matrix = divisi2.make_sparse(sparse_pieces) divisi2.save(matrix, 'feature_matrix_zh.smat')
def test_set_matrix(): m = make_sparse(data) mm = OurSparseMatrix() mm.set(m) assert_equal(matrix.get(), mm.get())
def test_equal(): m = make_sparse(data) assert_equal(matrix.get(), m)