def get_actors_tags_space(self):
        # variable init
        actor_tag_map = {}
        actor_rank_map = {}
        actor_timestamp_map = {}
        data_set = self.data_set
        tfidf = TfIdf()

        # PART 1 - get complete data set

        # Get all the tags for movies for all actors


        actor_tag_query = "select actorid, tagid, timestamp,actor_movie_rank from (0!.moviedata.movie_actor) ij `movieid xgroup .moviedata.mltags"

        # Execute Query

        movie_tag_data = data_set.get_data_from_kdb(actor_tag_query)

        # Add  tag-movie and tag-timestamp relationship to map

        for elem in movie_tag_data:
            if(elem[0] not in actor_tag_map.keys()):
                actor_tag_map[elem[0]] = np.array(elem[1]).tolist()
                actor_timestamp_map[elem[0]] = np.array(elem[2]).tolist()
                actor_rank_map[elem[0]] = len(np.array(elem[2]).tolist()) * np.array(elem[3]).tolist()
            else:
                actor_tag_map[elem[0]] += np.array(elem[1]).tolist()
                actor_timestamp_map[elem[0]] += np.array(elem[2]).tolist()
                actor_rank_map[elem[0]] += len(np.array(elem[2]).tolist()) * np.array(elem[3]).tolist()

        # PART 3 - TF IDF

        # Calculate TF per document
        for key in actor_tag_map:
            tfidf.calculate_tf(key, actor_tag_map[key])

        # Normalize data and find time weighted TF under timestamp tag

        rank_data = {'rank': basics.combine_map_value_list(actor_timestamp_map)}
        df = data_set.normalize_data(rank_data)
        normalized_rank = df['rank'].tolist()

        # Calculate time weighted TF
        tfidf.calculate_timeweighted_tf(actor_tag_map, normalized_rank)

        # Calculate TF-IDF for all documents
        tfidf.calculate_tfidf()
        # calculate all tags and all movies
        alltags = basics.combine_map_value_list(actor_tag_map)
        allactors = actor_tag_map.keys()

        # create object feature matrix with zero values
        object_feature_matrix = np.zeros(shape=(len(allactors), len(alltags)))
        # fill up the matrix
        object_feature_matrix = basics.fill_matrix(object_feature_matrix, allactors, alltags, tfidf.tfidf)
        return(object_feature_matrix, allactors, alltags, actor_tag_map)
    def get_movie_genre_space(self):
        # variable init
        movie_tag_map = {}
        movie_rank_map = {}
        movie_timestamp_map = {}
        data_set = self.data_set
        tfidf = TfIdf()

        # PART 1 - get complete data set

        # Get all the tags for movies for all actors


        movie_tag_query = "select movieid, genres, timestamp from (0!`movieid xgroup .moviedata.mlratings) ij `movieid xgroup .moviedata.mlmovies"

        movie_tag_data = data_set.get_data_from_kdb(movie_tag_query)
        # Add  tag-movie and tag-timestamp relationship to map
        for elem in movie_tag_data:
            array = np.array(elem[2]).tolist()
            if(elem[0] not in movie_tag_map.keys()):
                movie_tag_map[elem[0]] = np.array(elem[1]).tolist()[0].split("|")
                movie_timestamp_map[elem[0]] = array
            else:
                movie_tag_map[elem[0]] += np.array(elem[1]).tolist()[0].split("|")
                movie_timestamp_map[elem[0]] += array

        # PART 3 - TF IDF

        # Calculate TF per document
        for key in movie_tag_map:
            tfidf.calculate_tf(key, movie_tag_map[key])

        # Normalize data and find time weighted TF under timestamp tag

        rank_data = {'rank': basics.combine_map_value_list(movie_timestamp_map)}
        df = data_set.normalize_data(rank_data)
        normalized_rank = df['rank'].tolist()

        # Calculate time weighted TF
        tfidf.calculate_timeweighted_tf(movie_tag_map, normalized_rank)


        # Calculate TF-IDF for all documents
        tfidf.calculate_tfidf()

        # calculate all tags and all movies
        alltags = list(np.unique(basics.combine_map_value_list(movie_tag_map)))
        allmovies = movie_tag_map.keys()

        # create object feature matrix with zero values
        object_feature_matrix = np.zeros(shape=(len(allmovies), len(alltags)))
        # fill up the matrix
        object_feature_matrix = basics.fill_matrix(object_feature_matrix, allmovies, alltags, tfidf.tfidf)
        return(object_feature_matrix, allmovies, alltags, movie_tag_map)
    def calculate_timeweighted_tf(self, input_map, normalized_timestamp):
        tag_counts = []
        row = 0
        count = 0
        for movie in input_map:
            tag_counts.append(len(input_map[movie]))

        for tag, weight in zip(basics.combine_map_value_list(input_map), normalized_timestamp):
            self.tf[row][1][tag] += weight
            count = count + 1
            if tag_counts[row] == count:
                row = row + 1
                count = 0
    def modify_matrix_on_relevance(self, object_feature_matrix, relevance_feedback, movie_tag_map, all_tags):
        tag_weight_map = {}
        relevant_movie_tags = {}
        irrelevant_movie_tag = {}
        R = 0
        IR = 0

        # compute relevant tags
        for item in relevance_feedback:
            if relevance_feedback[item]:
                relevant_movie_tags[item] = movie_tag_map[item]
                R += 1
            else:
                irrelevant_movie_tag[item] = movie_tag_map[item]
                IR += 1

        relevant_tags = list(np.unique(basics.combine_map_value_list(relevant_movie_tags)))
        irrelevant_tags = list(np.unique(basics.combine_map_value_list(irrelevant_movie_tag)))
        N = len(relevance_feedback)

        # for relevant tag calculate ri and ni
        for tag in relevant_tags:
            ri = 0.0
            ni = 0.0
            for movie in relevance_feedback:
                status = tag in movie_tag_map[movie]
                if relevance_feedback[movie]:
                    ri += status
                ni += status

            first_term = (ri + 0.5) / (R - ri + 1)
            second_term = (ni - ri + 0.5) / (N - R - ni + ri + 1)
            tag_weight_map[tag] = math.log(first_term/second_term, 10)

        # update index tag with given weight
        for tag in tag_weight_map:
            wt = tag_weight_map[tag]
            index = all_tags.index(tag)
            for row in range(len(object_feature_matrix)):
                if object_feature_matrix[row][index] != 0:
                    object_feature_matrix[row][index] += wt

        # for relevant tag calculate ri and ni
        for tag in irrelevant_tags:
            ri = 0.0
            ni = 0.0
            for movie in relevance_feedback:
                status = tag in movie_tag_map[movie]
                if not relevance_feedback[movie]:
                    ri += status
                ni += status

            first_term = (ri + 0.5) / (IR - ri + 1)
            second_term = (ni - ri + 0.5) / (N - IR - ni + ri + 1)
            tag_weight_map[tag] = math.log(first_term/second_term, 10)

        for tag in tag_weight_map:
            wt = tag_weight_map[tag]
            index = all_tags.index(tag)
            for row in range(len(object_feature_matrix)):
                if object_feature_matrix[row][index] != 0:
                    object_feature_matrix[row][index] -= wt

        return object_feature_matrix