def tags_similar_to_vector(self, vec): """ Take in a category vector, and returns a weighted vector of associated tags in the study. You can run the `top_items()` method of this vector to get the most associated tags. """ return divisi2.dot(self.get_tag_matrix().normalize_rows(offset=1.0), vec)
def docs_similar_to_vector(self, vec, study='all'): """ Take in a category vector, and returns a weighted vector of associated documents in the study. You can run the `top_items()` method of this vector to get the most associated documents. """ return divisi2.dot(self.get_doc_matrix(study).normalize_rows(offset=1.0), vec)
def terms_similar_to_vector(self, vec): """ Take in a category vector, and returns a weighted vector of associated terms. You can run the `top_items()` method of this vector to get the most associated terms. """ return divisi2.dot(self.assoc.left, vec)
def learn_iter(mat): print "Starting learning process..." for i in range(NUM_LEARN_ITER): user_mat, axis_weights, movie_mat = learn(mat) # Reconstruct the learning matrix here. for j in range(NUM_USERS): for k in range(NUM_MOVIES): mat[j, k] = divisi2.dot(user_mat[i,:], movie_mat[j,:]) print "Learning process complete." start_time = time.time() predictions = divisi2.reconstruct(user_mat, axis_weights, movie_mat) print "Matrix reconstruction (elapsed time: %f s)." % (time.time() - start_time) return predictions
def domain_terms_similar_to_vector(self, vec): """ Take in a category vector, and returns a weighted vector of associated terms, but leave out ones that only appear in common sense background knowledge. You can run the `top_items()` method of this vector to get the most associated terms. """ # FIXME: this way of finding domain concepts is such a hack. mask = np.zeros((len(self.priority),), 'b') for i, item in enumerate(self.priority.items): if (self.priority.priority.has_key(i) and self.priority.priority[i] < 1e6): mask[i] = True return divisi2.multiply(divisi2.dot(self.assoc.left.normalize_rows(offset=1.0), vec), mask)
def vector_from_terms(self, terms): """ Get a category vector representing the given set of weighted terms, expressed as (term, weight) tuples. This will apply TF-IDF weighting. """ total_weight = 0.0 for _, weight in terms: total_weight += abs(weight) vec = divisi2.DenseVector( np.zeros((len(self.priority),)), labels=self.priority ) for term, weight in terms: if term in self.priority: index = self.priority.index(term) tfidf_weight = weight * self.get_term_idf(term) * self.database.normalized_relevance(term) vec[index] = tfidf_weight / total_weight category = divisi2.dot(vec, self.assoc.left) return category
def _cosine(self, v1, v2): return float(divisi2.dot(v1,v2) / (norm(v1) * norm(v2)))
def _cosine(self, v1, v2): return float(divisi2.dot(v1, v2) / (norm(v1) * norm(v2)))