def conceptnet_assoc(lang): import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_assoc_%s.gz' % lang) return matrix except IOError: graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang) matrix = sparse_matrix(graph, 'concepts', 'concepts', 3) divisi2.save(matrix, 'data:matrices/conceptnet_assoc_%s.gz' % lang) return matrix
def conceptnet_matrix(lang): # load from the included pickle file import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_%s.gz' % lang) return matrix except IOError: graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang) matrix = sparse_matrix(graph, 'concepts', 'features', 3) divisi2.save(matrix, 'data:matrices/conceptnet_%s.gz' % lang) return matrix
def __init__(self, matrix_path=data_path+'feature_matrix_zh.smat'): # AnalogySpace A = divisi2.load(matrix_path) self.A = A.normalize_all() self.concept_axes, axis_weights, self.feature_axes = self.A.svd(k=100) self.sim = divisi2.reconstruct_similarity(\ self.concept_axes, axis_weights, post_normalize=False) self.predict = divisi2.reconstruct(\ self.concept_axes, axis_weights, self.feature_axes) # Fast spreading activation assoc = divisi2.load(data_path+'assoc_matrix_zh.smat') self.assoc = assoc.normalize_all() U, S, _ = self.assoc.svd(k=100) self.spread = divisi2.reconstruct_activation(U, S)
def create_graph(matrix_path, dim_list, node_type): matrix = divisi2.load(matrix_path) if node_type == 'concepts': return ConceptGraph(matrix, dim_list) elif node_type == 'assertions': return AssertionGraph(matrix, dim_list) raise Exception("unrecognized node type: [%s]" % (node_type, ))
def get_tag_matrix(self): """ Get the matrix of all tags in a particular study. """ if hasattr(self, '_tag_matrix'): return self._tag_matrix else: return divisi2.load(self.filename_in_dir('tags.dmat'))
def __init__(self): # get the original sparce matrix self.__conceptnet = divisi2.load("/opt/work/emotion_analysis/data_source/conceptnet_en.pickle") # Get the matrix after svd self.__concept_axes, self.__axis_weights, self.__feature_axes = self.__conceptnet.svd(k=100) # Get the similarity operator self.__sim = divisi2.reconstruct_similarity(self.__concept_axes, self.__axis_weights, post_normalize=True)
def conceptnet5_assoc(): import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_assoc_5.1.1.gz') return matrix except IOError: filename = divisi2.fileIO.data_filename('data:graphs/conceptnet-5.1.1-sparse-links.csv') triples = divisi2.dataset.conceptnet5_links(filename) matrix = divisi2.SparseMatrix.from_named_entries(triples) divisi2.save(matrix, 'data:matrices/conceptnet_assoc_5.1.1.gz') return matrix
def add_concepts(matrix_path=data_path + "feature_matrix_zh.smat"): A = divisi2.load(matrix_path) db = Database() for concept in A.row_labels: num_word = len(" ".join(concept).split()) num_assertion = len(A.row_named(concept).keys()) cmd = ( u"INSERT INTO concept (name, num_word, num_assertion) \ VALUES ('%s', %d, %d)" % (concept, num_word, num_assertion) ) db.query_db(cmd)
def __init__(self): #get the original sparce matrix self.__conceptnet = divisi2.load( '/opt/work/emotion_analysis/data_source/conceptnet_en.pickle') #Get the matrix after svd self.__concept_axes, self.__axis_weights, self.__feature_axes = self.__conceptnet.svd( k=100) #Get the similarity operator self.__sim = divisi2.reconstruct_similarity(self.__concept_axes, self.__axis_weights, post_normalize=True)
def evaluate_assertions(self, filename): """ Evaluate the predictions that this matrix makes against a matrix of test data. This is kind of deprecated in favor of evaluate_ranking(), which does it more generally. """ def order_compare(s1, s2): assert len(s1) == len(s2) score = 0.0 total = 0 for i in xrange(len(s1)): for j in xrange(i + 1, len(s1)): if s1[i] < s1[j]: if s2[i] < s2[j]: score += 1 elif s2[i] > s2[j]: score -= 1 total += 1 elif s1[i] > s1[j]: if s2[i] < s2[j]: score -= 1 elif s2[i] > s2[j]: score += 1 total += 1 # move onto 0-1 scale score += (total - score) / 2.0 return (float(score) / total, score, total) import divisi2 testdata = divisi2.load(filename) values1 = [] values2 = [] row_labels = self.row_labels col_labels = self.col_labels for value, label1, label2 in testdata.named_entries(): if label1 in row_labels and label2 in col_labels: values1.append(value) values2.append(self.entry_named(label1, label2)) s1, s1s, s1t = order_compare(values1, values2) s2, s2s, s2t = order_compare(values1, values1) return s1s, s2s, s1 / s2
def get_doc_matrix(self, study_name='all'): """ Get the matrix of all documents in a particular study. """ return divisi2.load(self.filename_in_dir(study_name+'.dmat'))