Exemple #1
0
 def tags_similar_to_vector(self, vec):
     """
     Take in a category vector, and returns a weighted vector of
     associated tags in the study. You can run the `top_items()`
     method of this vector to get the most associated tags.
     """
     return divisi2.dot(self.get_tag_matrix().normalize_rows(offset=1.0), vec)
Exemple #2
0
 def docs_similar_to_vector(self, vec, study='all'):
     """
     Take in a category vector, and returns a weighted vector of
     associated documents in the study. You can run the `top_items()`
     method of this vector to get the most associated documents.
     """
     return divisi2.dot(self.get_doc_matrix(study).normalize_rows(offset=1.0), vec)
Exemple #3
0
 def terms_similar_to_vector(self, vec):
     """
     Take in a category vector, and returns a weighted vector of
     associated terms. You can run the `top_items()` method of this vector
     to get the most associated terms.
     """
     return divisi2.dot(self.assoc.left, vec)
Exemple #4
0
def learn_iter(mat):
    print "Starting learning process..."
    for i in range(NUM_LEARN_ITER):
        user_mat, axis_weights, movie_mat = learn(mat)
        # Reconstruct the learning matrix here.
        for j in range(NUM_USERS):
            for k in range(NUM_MOVIES):
                mat[j, k] = divisi2.dot(user_mat[i,:], movie_mat[j,:])
    print "Learning process complete."
    start_time = time.time()
    predictions = divisi2.reconstruct(user_mat, axis_weights, movie_mat)
    print "Matrix reconstruction (elapsed time: %f s)." % (time.time() - start_time)    
    return predictions
Exemple #5
0
    def domain_terms_similar_to_vector(self, vec):
        """
        Take in a category vector, and returns a weighted vector of
        associated terms, but leave out ones that only appear in common
        sense background knowledge.

        You can run the `top_items()` method of this vector
        to get the most associated terms.
        """
        # FIXME: this way of finding domain concepts is such a hack.
        mask = np.zeros((len(self.priority),), 'b')
        for i, item in enumerate(self.priority.items):
            if (self.priority.priority.has_key(i) and
                self.priority.priority[i] < 1e6):
                mask[i] = True
        return divisi2.multiply(divisi2.dot(self.assoc.left.normalize_rows(offset=1.0), vec), mask)
Exemple #6
0
    def vector_from_terms(self, terms):
        """
        Get a category vector representing the given set of weighted terms,
        expressed as (term, weight) tuples. This will apply TF-IDF weighting.
        """
        total_weight = 0.0
        for _, weight in terms:
            total_weight += abs(weight)

        vec = divisi2.DenseVector(
            np.zeros((len(self.priority),)),
            labels=self.priority
        )
        for term, weight in terms:
            if term in self.priority:
                index = self.priority.index(term)
                tfidf_weight = weight * self.get_term_idf(term) * self.database.normalized_relevance(term)
                vec[index] = tfidf_weight / total_weight
        category = divisi2.dot(vec, self.assoc.left)
        return category
Exemple #7
0
 def _cosine(self, v1, v2):
     return float(divisi2.dot(v1,v2) / (norm(v1) * norm(v2)))
Exemple #8
0
 def _cosine(self, v1, v2):
     return float(divisi2.dot(v1, v2) / (norm(v1) * norm(v2)))