コード例 #1
0
ファイル: summarizer.py プロジェクト: animeshramesh/T-Sum
 def summarize(self,input_path):
     dataset_preprocessor = Preprocessor()
     dataset_FeatureReducer = FeatureReducer()
     dataset_WeightsHandler = WeightsHandler()
     
     files = [f for f in os.listdir(input_path) if os.path.isfile(input_path + f)]
     
     preprocessed_list = dataset_preprocessor.preprocess(files, input_path)
     sentencelist = dataset_preprocessor.extract_sentences(files, input_path)
     
     dataset_WeightsHandler.set_preprocessed_list(preprocessed_list)
     dataset_WeightsHandler.set_sentence_list(sentencelist)
     dataset_WeightsHandler.update_totfreq_dict()
     dataset_WeightsHandler.replace_totfreq_dict(dataset_FeatureReducer.reduceFeatures(dataset_WeightsHandler.tot_freq_dict()))
     dataset_WeightsHandler.generate_inv_doc_freq_dict(preprocessed_list)      
     dataset_WeightsHandler.generate_tot_weight_dict()    
     dataset_WeightsHandler.generate_STM()
     
     vector_dict = dataset_WeightsHandler.sentence_weight_dict() #vector_dict[sentence]=vector
     
     dataset_FeatureReducer.remove_features_with_zero_weight(vector_dict)
     sentencelist_without_stopwords = dataset_preprocessor.remove_stop_words_from_sentencelist(sentencelist) 
     
     VectorSineRelationExtractor = SineRelationExtractor()
     sine_matrix = VectorSineRelationExtractor.extract_sine_similarity(vector_dict)
     
     synonym_assigner = SynonymAssigner()
     synonym_dict = synonym_assigner.assign_synonyms(sentencelist_without_stopwords)
 
     SentenceDissimilarityScorer = DissimilarityScorer()
     dissimilarity_matrix = SentenceDissimilarityScorer.assign_dissimilarity_score(synonym_dict, sentencelist_without_stopwords)
     final_score_matrix = SentenceDissimilarityScorer.multiply_sine(dissimilarity_matrix, sine_matrix)
     
     SentenceRanker = NodeRanker()
     scorelist_of_sentences= SentenceRanker.calculate_score_of_each_sentence(final_score_matrix)
     ranked_indices = SentenceRanker.rank_nodes(scorelist_of_sentences)
     
     for each_index in ranked_indices:
         print sentencelist[each_index]
コード例 #2
0
ファイル: testing.py プロジェクト: animeshramesh/T-Sum
from utils.preprocessor import Preprocessor
import os
from sets.size import Size
from sets.intersections import Intersections
from sets.scorer import Scorer
from graphs.node_ranker import NodeRanker
from sets.distributed_ranks import RankDistributor



input_path = '/home/animesh/T-Sum/Data sets/Inception/'
files = [f for f in os.listdir(input_path) if os.path.isfile(input_path + f)]
prep = Preprocessor()
sentence_list = prep.extract_sentences(files, input_path)
preprocessed_words_in_each_sentence = []

for s in sentence_list:
    preprocessed_words_in_each_sentence.append(prep.preprocess_sentence(s)) 

size = Size()
intersections = Intersections()
scorer = Scorer()
ranker = NodeRanker()
rank_counter_in_0_to_1 = RankDistributor()

size_of_sets = size.calculate_size_of_set(preprocessed_words_in_each_sentence)
number_of_intersections_of_each_sentence = intersections.count_itersections_of_each_set(preprocessed_words_in_each_sentence)
scores = scorer.score_sentences(number_of_intersections_of_each_sentence, size_of_sets)

normalised_scores = scorer.normalise_score(scores)
distributed_ranks = rank_counter_in_0_to_1.distribute_ranks(normalised_scores)