def sw_Process(): [all_sentences, true_segment] = readingfiles.read_testing_file('2008080814') [ transition_prob, length_prior, seg_num_prior ] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt') classifier = Classifier('preprocessing/model_segmenter.txt') segmentation_model = SegmentationModel(all_sentences, transition_prob, length_prior, seg_num_prior, classifier) plotter = Plotter(segmentation_model, true_segment) node_number = len(all_sentences) edges = [] for i in range(0, node_number - 1): j = i + 1 edges.append([i, j]) print('Start Sampling') sw.sample(node_number, edges, segmentation_model.calculate_Qe, segmentation_model.target_evaluation_func, plotter.plot_callback, initial_clustering=None, monitor_statistics=segmentation_model.calculate_energy) print('Converged.') plotter.save()
def _reform_by_multilevel_sw(self): """Reform the whole tree by doing multi-level SW-Cuts.""" need_next_level = True level_counter = 0 # Create label list for printing labels. document_labels = [d.name for d in self.corpus.documents] # Initially, each document is a vertex. current_vertex_distributions = [] # Initial clustering treat all vertex in the same cluster. current_clustering = [set(range(0, len(self.corpus)))] while need_next_level: level_counter += 1 config = self._generate_next_sw_config( current_vertex_distributions, current_clustering, level_counter) plotter = _Plotter(config) if len(GROUND_TRUTHS) >= level_counter: ground_truth = GROUND_TRUTHS[level_counter - 1] plotter.set_ground_truth(ground_truth) # Add the very bottom level of tree. if (level_counter == 1): self._initialize_tree(config.vertex_distributions) self.topic_tree.print_hiearchy(labels=document_labels) # FIXME: This condition check is keep here temporarily for testing level 2. # Remove 'if-condition' and only keep 'else' part before launch. #if (level_counter == 1): # current_clustering = ground_truth #else: # Clustering by SW. current_clustering = sw.sample( config.graph_size, config.edges, config.edge_prob_func, config.target_eval_func, intermediate_callback=plotter.plot_callback, initial_clustering=None, monitor_statistics=config.monitor_statistics) current_vertex_distributions = config.vertex_distributions # Save current clustering as a new level to the tree. self._add_level_to_tree(current_clustering, _combine_vertex_distributions_given_clustering(current_vertex_distributions, current_clustering)) self.topic_tree.print_hiearchy(labels=document_labels, synthesize_title=True, vocabularies=self.corpus.vocabularies) plotter.save('multilevel_sw_{0}.png'.format(level_counter)) # To have a classifier out of first level clustering #if level_counter == 1: #classifier = Classifier() #classifier.train_from_corpus(self.corpus, current_clustering) if level_counter >= 2: need_next_level = False
def sw_Process(): [all_sentences, true_segment] = readingfiles.read_testing_file('2008080814') [transition_prob, length_prior, seg_num_prior] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt') classifier = Classifier('preprocessing/model_segmenter.txt') segmentation_model = SegmentationModel(all_sentences, transition_prob, length_prior, seg_num_prior, classifier) plotter = Plotter(segmentation_model, true_segment) node_number = len(all_sentences) edges = [] for i in range(0, node_number-1): j = i + 1 edges.append([i, j]) print('Start Sampling') sw.sample(node_number, edges, segmentation_model.calculate_Qe, segmentation_model.target_evaluation_func, plotter.plot_callback, initial_clustering=None, monitor_statistics=segmentation_model.calculate_energy) print('Converged.') plotter.save()
def SW_Process(): node_number = N * N model = Model() plotter = Plotter(model) initial_labeling = [] for i in range(0, node_number): initial_labeling.append(i % 2) #initial_labeling = None print('Start Sampling') sw.sample(node_number, edges, model.edge_prob, model.target_prob, plotter.plot_callback, initial_labeling=initial_labeling, max_labels=2)
def _reform_by_multilevel_sw(self): """Reform the whole tree by doing multi-level SW-Cuts.""" need_next_level = True level_counter = 0 # Create label list for printing labels. document_labels = [d.name for d in self.corpus.documents] # Initially, each document is a vertex. current_vertex_distributions = [] # Initial clustering treat all vertex in the same cluster. current_clustering = [set(range(0, len(self.corpus)))] while need_next_level: level_counter += 1 config = self._generate_next_sw_config( current_vertex_distributions, current_clustering, level_counter) plotter = _Plotter(config) if len(GROUND_TRUTHS) >= level_counter: ground_truth = GROUND_TRUTHS[level_counter - 1] plotter.set_ground_truth(ground_truth) # Add the very bottom level of tree. if (level_counter == 1): self._initialize_tree(config.vertex_distributions) self.topic_tree.print_hiearchy(labels=document_labels) # FIXME: This condition check is keep here temporarily for testing level 2. # Remove 'if-condition' and only keep 'else' part before launch. #if (level_counter == 1): # current_clustering = ground_truth #else: # Clustering by SW. current_clustering = sw.sample( config.graph_size, config.edges, config.edge_prob_func, config.target_eval_func, intermediate_callback=plotter.plot_callback, initial_clustering=None, monitor_statistics=config.monitor_statistics) current_vertex_distributions = config.vertex_distributions # Save current clustering as a new level to the tree. self._add_level_to_tree( current_clustering, _combine_vertex_distributions_given_clustering( current_vertex_distributions, current_clustering)) self.topic_tree.print_hiearchy( labels=document_labels, synthesize_title=True, vocabularies=self.corpus.vocabularies) plotter.save('multilevel_sw_{0}.png'.format(level_counter)) # To have a classifier out of first level clustering #if level_counter == 1: #classifier = Classifier() #classifier.train_from_corpus(self.corpus, current_clustering) if level_counter >= 2: need_next_level = False