Example #1
0
def sw_Process():
    [all_sentences,
     true_segment] = readingfiles.read_testing_file('2008080814')
    [
        transition_prob, length_prior, seg_num_prior
    ] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt')
    classifier = Classifier('preprocessing/model_segmenter.txt')

    segmentation_model = SegmentationModel(all_sentences, transition_prob,
                                           length_prior, seg_num_prior,
                                           classifier)
    plotter = Plotter(segmentation_model, true_segment)

    node_number = len(all_sentences)
    edges = []
    for i in range(0, node_number - 1):
        j = i + 1
        edges.append([i, j])

    print('Start Sampling')
    sw.sample(node_number,
              edges,
              segmentation_model.calculate_Qe,
              segmentation_model.target_evaluation_func,
              plotter.plot_callback,
              initial_clustering=None,
              monitor_statistics=segmentation_model.calculate_energy)
    print('Converged.')
    plotter.save()
Example #2
0
    def _reform_by_multilevel_sw(self):
        """Reform the whole tree by doing multi-level SW-Cuts."""
        need_next_level = True
        level_counter = 0

        # Create label list for printing labels.
        document_labels = [d.name for d in self.corpus.documents]

        # Initially, each document is a vertex.
        current_vertex_distributions = []
        # Initial clustering treat all vertex in the same cluster.
        current_clustering = [set(range(0, len(self.corpus)))]

        while need_next_level:
            level_counter += 1

            config = self._generate_next_sw_config(
                current_vertex_distributions, current_clustering, level_counter)
            plotter = _Plotter(config)

            if len(GROUND_TRUTHS) >= level_counter:
                ground_truth = GROUND_TRUTHS[level_counter - 1]
                plotter.set_ground_truth(ground_truth)

            # Add the very bottom level of tree.
            if (level_counter == 1):
                self._initialize_tree(config.vertex_distributions)
                self.topic_tree.print_hiearchy(labels=document_labels)

            # FIXME: This condition check is keep here temporarily for testing level 2.
            # Remove 'if-condition' and only keep 'else' part before launch.
            #if (level_counter == 1):
            #    current_clustering = ground_truth
            #else:
                # Clustering by SW.
            current_clustering = sw.sample(
                config.graph_size,
                config.edges,
                config.edge_prob_func,
                config.target_eval_func,
                intermediate_callback=plotter.plot_callback,
                initial_clustering=None,
                monitor_statistics=config.monitor_statistics)
            current_vertex_distributions = config.vertex_distributions

            # Save current clustering as a new level to the tree.
            self._add_level_to_tree(current_clustering, _combine_vertex_distributions_given_clustering(current_vertex_distributions, current_clustering))
            self.topic_tree.print_hiearchy(labels=document_labels, synthesize_title=True, vocabularies=self.corpus.vocabularies)
            plotter.save('multilevel_sw_{0}.png'.format(level_counter))

            # To have a classifier out of first level clustering
            #if level_counter == 1:
                #classifier = Classifier()
                #classifier.train_from_corpus(self.corpus, current_clustering)

            if level_counter >= 2:
                need_next_level = False
Example #3
0
def sw_Process():
    [all_sentences, true_segment] = readingfiles.read_testing_file('2008080814')
    [transition_prob, length_prior, seg_num_prior] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt')
    classifier = Classifier('preprocessing/model_segmenter.txt')

    segmentation_model = SegmentationModel(all_sentences, transition_prob, length_prior, seg_num_prior, classifier)
    plotter = Plotter(segmentation_model, true_segment)

    node_number = len(all_sentences)
    edges = []
    for i in range(0, node_number-1):
        j = i + 1
        edges.append([i, j])

    print('Start Sampling')
    sw.sample(node_number, edges, segmentation_model.calculate_Qe, segmentation_model.target_evaluation_func, plotter.plot_callback, initial_clustering=None, monitor_statistics=segmentation_model.calculate_energy)
    print('Converged.')
    plotter.save()
Example #4
0
def SW_Process():
    node_number = N * N

    model = Model()
    plotter = Plotter(model)

    initial_labeling = []
    for i in range(0, node_number):
        initial_labeling.append(i % 2)
    #initial_labeling = None

    print('Start Sampling')
    sw.sample(node_number,
              edges,
              model.edge_prob,
              model.target_prob,
              plotter.plot_callback,
              initial_labeling=initial_labeling,
              max_labels=2)
Example #5
0
    def _reform_by_multilevel_sw(self):
        """Reform the whole tree by doing multi-level SW-Cuts."""
        need_next_level = True
        level_counter = 0

        # Create label list for printing labels.
        document_labels = [d.name for d in self.corpus.documents]

        # Initially, each document is a vertex.
        current_vertex_distributions = []
        # Initial clustering treat all vertex in the same cluster.
        current_clustering = [set(range(0, len(self.corpus)))]

        while need_next_level:
            level_counter += 1

            config = self._generate_next_sw_config(
                current_vertex_distributions, current_clustering,
                level_counter)
            plotter = _Plotter(config)

            if len(GROUND_TRUTHS) >= level_counter:
                ground_truth = GROUND_TRUTHS[level_counter - 1]
                plotter.set_ground_truth(ground_truth)

            # Add the very bottom level of tree.
            if (level_counter == 1):
                self._initialize_tree(config.vertex_distributions)
                self.topic_tree.print_hiearchy(labels=document_labels)

            # FIXME: This condition check is keep here temporarily for testing level 2.
            # Remove 'if-condition' and only keep 'else' part before launch.
            #if (level_counter == 1):
            #    current_clustering = ground_truth
            #else:
            # Clustering by SW.
            current_clustering = sw.sample(
                config.graph_size,
                config.edges,
                config.edge_prob_func,
                config.target_eval_func,
                intermediate_callback=plotter.plot_callback,
                initial_clustering=None,
                monitor_statistics=config.monitor_statistics)
            current_vertex_distributions = config.vertex_distributions

            # Save current clustering as a new level to the tree.
            self._add_level_to_tree(
                current_clustering,
                _combine_vertex_distributions_given_clustering(
                    current_vertex_distributions, current_clustering))
            self.topic_tree.print_hiearchy(
                labels=document_labels,
                synthesize_title=True,
                vocabularies=self.corpus.vocabularies)
            plotter.save('multilevel_sw_{0}.png'.format(level_counter))

            # To have a classifier out of first level clustering
            #if level_counter == 1:
            #classifier = Classifier()
            #classifier.train_from_corpus(self.corpus, current_clustering)

            if level_counter >= 2:
                need_next_level = False