Exemple #1
0
    def rank_relevant_docs_w2v(self, w2v_model, query_as_list, relevant_docs):

        # counter_of_terms = relevant_docs[1]  # key = term ___ value = number of times this term was in the query
        relevant_doc = relevant_docs[
            0]  # the posting dic of all terms of the query

        qvector = self.get_embedding_w2v(w2v_model, query_as_list)
        tweet_id_data = {}
        tweet_id_CosSim = {}

        tweet_id_data.clear()
        tweet_id_CosSim.clear()

        for value in relevant_doc.values():
            for v in value.keys():
                if v not in tweet_id_data:
                    tweet_id_data[v] = self.docs_dic[v]
                    tweet_id_CosSim[v] = [0]

        norm_vec_q = norm(qvector)
        for key, value in tweet_id_data.items():
            for term in query_as_list:
                if '1283449480642064384' == key:
                    x = 1
                vec = self.get_embedding_w2v(w2v_model, self.tweet_dic[key])
                tweet_id_CosSim[key] = np.dot(qvector,
                                              vec) / (norm_vec_q * norm(vec))

        res1 = dict(
            sorted(tweet_id_CosSim.items(), key=lambda e: e[1],
                   reverse=True))  # for test

        res12 = list(res1.keys())
        return res12
Exemple #2
0
def Recognition1(img, m, A, Eigenfaces):
    # ProjectedImages = np.empty((98304, 1))
    ProjectedImages = np.empty((19, 1))
    Train_Number = Eigenfaces.shape[1]
    # print(Train_Number)
    # print(Eigenfaces.shape)
    print(Eigenfaces)
    print(A)
    for i in range(0, Train_Number):
        temp = np.dot(np.transpose(Eigenfaces), A[:, i])
        # print(temp)
        # print(temp.shape)
        ProjectedImages = np.c_[ProjectedImages, temp]
    ProjectedImages = np.delete(ProjectedImages, 0, axis=1)
    # print(ProjectedImages)
    InputImage = img
    # print(InputImage)
    temp = InputImage[:, :, 1]
    # print(temp)
    row, col = temp.shape
    # print('row=',row)
    # print('col=',col)
    InImage = np.transpose(temp).reshape(row * col, 1)
    # print(InImage)
    # print(InImage.shape)
    # m = m.astype(np.int16)
    InImage = m.astype(np.int16)
    Difference = np.double(InImage) - m
    # print(Difference)
    # temp = np.double(T[:, i]) - np.double(m)
    ProjectedTestImages = np.dot(np.transpose(Eigenfaces), Difference)
    # print(ProjectedTestImages)
    # global Euc_dist
    Euc_dist = []
    for i in range(0, Train_Number):
        q = ProjectedImages[:, i]
        # print(q)
        temp = np.dot((norm(ProjectedTestImages - q)),
                      (norm(ProjectedTestImages - q)))
        # print(norm(ProjectedTestImages - q))
        # print('temp=',temp)
        Euc_dist.append(temp)
    # print('list=',Euc_dist)
    Euc_dist_min = min(Euc_dist)
    # print("min=",Euc_dist_min)
    Recognized_index = Euc_dist.index(min(Euc_dist))
    # print('index=',Recognized_index)
    OutputName = str(Recognized_index) + '.jpg'
    # print(OutputName)
    return OutputName


#以下为模块测试所需参数、代码
# img = cv2.imread("C:/Users/78111/Desktop/TestDatabase/9.jpg")
# T = CreateDatabase.CreateDatabase()
# m,A,Eigenfaces = EigenfaceCore(T)
# Recognition1(img,m,A,Eigenfaces)
Exemple #3
0
 def __init__(self, files, language='french'):
     self.parser = Parser(language=language,
                          default_remove_stopwords=True,
                          default_stem=True)
     # retrieve file contents and tokenize it
     ParsedFile = namedtuple('ParsedFile',
                             'title content original_content uniq_words')
     for file in files:
         title, original_content = parse_course(file)
         content = self.parser.tokenize(
             original_content) + self.parser.tokenize(title)
         self.files[basename(file)[:-4]] = ParsedFile(
             title, content, original_content, set(content))
     # list of all uniq words, eventually optimised with stemming and stopwords sorting.
     word_list = set(word for file in self.files.values()
                     for word in file.uniq_words)
     number_of_documents = len(files)
     # this next operation process idf for each word in the document, it can take a while.
     self.words_index = {
         word: (index, number_of_documents / self.__count_docs(word))
         for (index, word) in enumerate(word_list)
     }
     for acronym, file in self.files.items():
         vector = [0] * len(self.words_index)
         for word in file.content:
             # we add idf each time we see a word, this ends up having tf*idf
             vector[self.words_index[word][0]] += self.words_index[word][0]
         self.vectors[acronym] = vector
     self.norms = Keydefaultdict(
         lambda acronym: norm(self.vectors[acronym]))
     self.cosines = Keydefaultdict(lambda acr_a: Keydefaultdict(
         lambda acr_b: self.__cosine(self.vectors[acr_a], self.vectors[
             acr_b], self.norms[acr_a], self.norms[acr_b])))
Exemple #4
0
    def fromLoop(cls, loop):
        """Returns a Model representing the loop"""
        #get necessary vectors
        offset_v = [loop.r_anchor[0].__dict__[c] - loop.l_anchor[0].__dict__[c] for c in 'xyz']
        sse0_v = Model.__get_sse_vector(loop.l_anchor, loop.atoms[0])
        sse1_v = Model.__get_sse_vector(loop.r_anchor, loop.atoms[-1]) 

        sFrame = TransformFrame.createFromVectors(loop.l_anchor[0], transform.Vec.from_array(offset_v), transform.Vec.from_array(sse0_v))
        
        #Theta and phi are the angles between the SSE and anchor-anchor vector
        theta = arccos(dot(sse0_v, negative(offset_v)) / (norm(sse0_v) * norm(offset_v)))
        phi = arccos(dot(sse1_v, offset_v) / (norm(sse1_v) * norm(offset_v)))
        
        #Length of the vectorn
        anchor_dist = norm(offset_v)
        
        return Model([loop], [Vec.from_array(sFrame.transformInto(atom)) for atom in loop.atoms], theta, phi, anchor_dist, [loop.l_type, loop.r_type], Model.__gen_seq([loop.seq]) , 1)
Exemple #5
0
def similarity(a, b):
    if int(a[1]) == int(
            b[1]):  # első autó edge-e megegyezik-e a másik autó edgével?
        return norm(
            a[2:4] - b[2:4]
        )  # ha igen, akkor adjuk meg a tavolsagot. dataframe oszlopait címzem itt
    else:
        return 1000000  # egy meglehetosen nagy ertek ami megakadalyozza hogy a nem azonos edge-n levok azonos klaszterben legyenek
    def rank_relevant_docs(relevant_docs, norm_query, k=None):
        """
        This function provides rank for each relevant document and sorts them by their scores.
        The current score considers solely the number of terms shared by the tweet (full_text) and query.
        :param norm_query:
        :param k: number of most relevant docs to return, default to everything.
        :param relevant_docs: dictionary of documents that contains at least one term from the query.
        :return: sorted list of documents by score
        """
        ranked_results = {}
        for doc in relevant_docs.keys():
            sum_wij_wiq = dot(relevant_docs[doc], norm_query)
            cos_similarity = sum_wij_wiq / (norm(relevant_docs[doc]) *
                                            norm(norm_query))
            ranked_results[doc] = cos_similarity

        ranked_results = sorted(ranked_results.items(),
                                key=lambda item: item[1],
                                reverse=True)

        if k is not None:
            ranked_results = ranked_results[:k]
        return [d[0] for d in ranked_results]
Exemple #7
0
def first_pc(a):
    epsilon = 1e-10
    a = np.array(a, dtype=float)
    cov = a.dot(a.T)
    n = cov.shape[0]
    x = random_unit_vector(n)
    current_v = x

    while True:
        last_v = current_v
        current_v = np.dot(cov, last_v)
        current_v = current_v / norm(current_v)

        if abs(np.dot(current_v, last_v)) > 1 - epsilon:
            return current_v
def extract_transformation(homography):
    """
    Take a 3x3 homography representing a truncated rigid transformation (where *z* is
    assumed to be zero so the third column of the rotation matrix is removed) and
    extract a `RigidTransformation` from it.
    """
    # It's easier to deal with columns
    homography = homography.T
    
    # Normalize to remove any scaling factor
    homography /= norm(homography[0])
    
    trans = np.empty((4,3))
    trans[:2] = homography[:2]
    trans[2]  = np.cross(homography[0], homography[1])
    trans[3]  = homography[2]
    trans = trans.T
    
    # Constrain the first three columns to be orthogonal
    # NOTE: The decomposition used is extremely unstable. For now, just skip this step and
    #   hope for the best.
    #trans[:,0:3] = nearest_rotation_matrix(trans[:,0:3])
    
    return RigidTransformation(matrix=trans)
Exemple #9
0
    def rank_relevant_docs(relevant_doc,
                           qurey,
                           indexer,
                           model_vector=None,
                           k=None):
        """
        This function provides rank for each relevant document and sorts them by their scores.
        The current score considers solely the number of terms shared by the tweet (full_text) and query.
        :param k: number of most relevant docs to return, default to everything.
        :param relevant_docs: dictionary of documents that contains at least one term from the query.
        :return: sorted list of documents by score
        """
        config = indexer.config
        # key= doc_id, value= (num_of_terms appears_in_doc from qury, [(terms,num_of_term_appears)])
        # print("start ranking")
        start_rank = timer()
        dict_doc = {}  # key - doc_id . value - sigma (tf*idf*vector_term)

        inverted_index = indexer.inverted_idx  #keys-terms. values- [line number in post file,number of documents appears in,total appearance in corpus]
        documents_data = indexer.documents_data  #keys- document id. values- [max freq term, number of difrent words, number of words]
        temp_list = []
        idf_dict = {}  # key= term, value= idf
        vector_query = numpy.zeros(300)
        dict_vector_model = {
        }  # key = term . value- vector represantion from model
        for term in qurey:
            if term in model_vector.vocab:
                term_vector = model_vector.wv.get_vector(term)
            else:  # not exist in the model
                term_vector = numpy.zeros(300)
            dict_vector_model[
                term] = term_vector  # keeping the vector of the term
            tf_q = qurey[term]
            # idf_query
            df_term = inverted_index[term][1]
            total_docs = len(documents_data.keys())
            idf_term = math.log((total_docs / df_term), 2)
            idf_dict[term] = idf_term
            tf_idf_q = tf_q * idf_term
            result = tf_idf_q * term_vector
            vector_query += result

        # calculate the vector of current query

        start_calculating_vectors = timer()
        for doc_id in relevant_doc.keys():
            for term in relevant_doc[doc_id][
                    1]:  # key= doc_id, value= (num_of_terms_in_qury, [(term,num_of_term_appears)])
                term_name = term[0]
                tf_term = int(term[1]) / documents_data[doc_id][2]
                idf_term = float(idf_dict[term_name])
                tf_idf = tf_term * idf_term
                tf_idf_vector = tf_idf * dict_vector_model[term_name]
                if doc_id in dict_doc.keys():
                    dict_doc[doc_id] += tf_idf_vector
                else:
                    new_vec = numpy.zeros(300)
                    dict_doc[doc_id] = new_vec
                    dict_doc[doc_id] += tf_idf_vector
        end_calculating_vectors = timer()
        #print(str(timedelta(seconds=end_calculating_vectors - start_calculating_vectors)) + "calculatingvectors time")
        rank_cosine = []
        temp_vec = []
        cosine_sim = numpy.zeros(300)
        start_calculate_cosim = timer()
        for doc in dict_doc.items():
            doc_as_vec = doc[1]
            if numpy.all(doc_as_vec) == 0 or numpy.all(
                    vector_query) == 0:  # if the doc vector is zeros.
                rank_tuple = (0, doc[0], relevant_doc[doc[0]][0])
                rank_cosine.append(rank_tuple)
            else:
                dot = numpy.dot(vector_query, doc_as_vec)
                test_norm = norm(vector_query)
                doc_weight = math.sqrt(float(documents_data[doc[0]][3]))
                # doc_weight_norm = doc_weight / documents_data[doc[0]][2]
                #mechane = test_norm * doc_weight
                #cosine = dot / mechane
                normlize = norm(vector_query) * norm(doc_as_vec)
                cosine_sim = (dot / (normlize * doc_weight))
                #cos = (cosine_sim * 0.7) + (0.3 * dot) #0.707
                rank_tuple = (
                    cosine_sim, doc[0], relevant_doc[doc[0]][0]
                )  # cosine_similarity result, doc_id, number of common word withw
                if cosine_sim < 0.33:
                    continue
                rank_cosine.append(rank_tuple)
        end_calculate_cosim = timer()
        #print(str(timedelta(seconds=end_calculate_cosim - start_calculate_cosim)) + "calculate_cosim time")

        rank_list_sorted = sorted(rank_cosine, reverse=True)
        end_rank = timer()

        # print("finished rank at {}".format(timedelta(seconds=end_rank-start_rank)))

        if k is not None:
            rank_list_sorted = rank_list_sorted[:k]
        return [d[1] for d in rank_list_sorted]
Exemple #10
0
def inspect_infeasible(f, h, x, ν, k):
    return np.concatenate([
        [norm(ConstrainedNewtonInfeasible.r_primal(f, h, x, ν)), norm(ConstrainedNewtonInfeasible.r_dual(h, x))],
        [norm(ConstrainedNewtonInfeasible.r(f, h)(x, ν))],
        x.T[0],
    ])
Exemple #11
0
def inspect_2d_infeasible(f, h, x, ν, k):
    return np.concatenate([
        [f(x)],
        *x,
        [norm(ConstrainedNewtonInfeasible.r_primal(f, h, x, ν)), norm(ConstrainedNewtonInfeasible.r_dual(h, x))]
    ])
Exemple #12
0
def cos_similarity(vec1, vec2):
    return dot(vec2, vec1) / (norm(vec2) * norm(vec1))