Ejemplo n.º 1
0
    def generate_STM(self):    

        preprocessor = Preprocessor()
        for sentence in self.__sentenceList:
            preprocessed_words = preprocessor.preprocess_sentence(sentence)
            sentence_weight = []
            for feature in self.tot_weight_dict().keys():
                if feature in preprocessed_words:
                    sentence_weight.append(self.__tot_weight_dict[feature])
                else:
                    sentence_weight.append(0)
                
            self.__sentenceWeight_dict[sentence] = sentence_weight
Ejemplo n.º 2
0
from sets.size import Size
from sets.intersections import Intersections
from sets.scorer import Scorer
from graphs.node_ranker import NodeRanker
from sets.distributed_ranks import RankDistributor



input_path = '/home/animesh/T-Sum/Data sets/Inception/'
files = [f for f in os.listdir(input_path) if os.path.isfile(input_path + f)]
prep = Preprocessor()
sentence_list = prep.extract_sentences(files, input_path)
preprocessed_words_in_each_sentence = []

for s in sentence_list:
    preprocessed_words_in_each_sentence.append(prep.preprocess_sentence(s)) 

size = Size()
intersections = Intersections()
scorer = Scorer()
ranker = NodeRanker()
rank_counter_in_0_to_1 = RankDistributor()

size_of_sets = size.calculate_size_of_set(preprocessed_words_in_each_sentence)
number_of_intersections_of_each_sentence = intersections.count_itersections_of_each_set(preprocessed_words_in_each_sentence)
scores = scorer.score_sentences(number_of_intersections_of_each_sentence, size_of_sets)

normalised_scores = scorer.normalise_score(scores)
distributed_ranks = rank_counter_in_0_to_1.distribute_ranks(normalised_scores)
print distributed_ranks