def head_cutting_experiment(in_train_set, in_test_set):
    for chunks_number in xrange(4):
        freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(
            in_chunks_number=10, in_cut_head=chunks_number)
        quality = classify.classify_texts(in_train_set, in_test_set,
                                          freq_chunk_filter)
        print '%d head chunks cut: classification quality = %f' % (
            chunks_number, quality)
def middle_cutting_experiment(in_train_set, in_test_set):
    for percent in [35, 40, 45]:
        freq_group_filter = \
            frequency_chunking.PrecisePercentInvertedFilterWrapper(in_cut_head = percent, in_cut_tail = percent)
        quality = classify.classify_texts(in_train_set, in_test_set,
                                          freq_group_filter)
        print '%d middle percent cut: classification quality = %f' % (
            100 - 2 * percent, quality)
def head_cutting_experiment(in_train_set, in_test_set):
    for percent in [0, 10, 20, 30]:
        freq_group_filter = \
            frequency_chunking.PrecisePercentFilterWrapper(in_cut_head = percent)
        quality = classify.classify_texts(in_train_set, in_test_set,
                                          freq_group_filter)
        print '%d head percent cut: classification quality = %f' % (percent,
                                                                    quality)
def chunk_window_experiment(in_train_set, in_test_set):
    window_size = 5
    for begin_chunk in xrange(frequency_chunking.DEFAULT_CHUNKS_NUMBER - window_size + 1):
        cut_head = begin_chunk
        cut_tail = frequency_chunking.DEFAULT_CHUNKS_NUMBER - begin_chunk - window_size
        #(cut_left, cut_right) = get_filtered_distribution_test(cut_head, cut_tail)
        # print "%d,%d --> [%s:%s]" % (cut_head, cut_tail, cut_left, cut_right)
        freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(in_cut_head = cut_head,
                                                                    in_cut_tail = cut_tail)
        quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter)
        print '%d -- %d chunks used: classification quality = %f' % \
              (begin_chunk, begin_chunk + window_size - 1, quality)
def chunk_window_experiment(in_train_set, in_test_set):
    window_size = 5
    for begin_chunk in xrange(frequency_chunking.DEFAULT_CHUNKS_NUMBER -
                              window_size + 1):
        cut_head = begin_chunk
        cut_tail = frequency_chunking.DEFAULT_CHUNKS_NUMBER - begin_chunk - window_size
        #(cut_left, cut_right) = get_filtered_distribution_test(cut_head, cut_tail)
        # print "%d,%d --> [%s:%s]" % (cut_head, cut_tail, cut_left, cut_right)
        freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(
            in_cut_head=cut_head, in_cut_tail=cut_tail)
        quality = classify.classify_texts(in_train_set, in_test_set,
                                          freq_chunk_filter)
        print '%d -- %d chunks used: classification quality = %f' % \
              (begin_chunk, begin_chunk + window_size - 1, quality)
def head_cutting_experiment(in_train_set, in_test_set):
    for chunks_number in xrange(4):
        freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(in_chunks_number = 10,
                                                                    in_cut_head = chunks_number)
        quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter)
        print '%d head chunks cut: classification quality = %f' % (chunks_number, quality)
def head_cutting_experiment(in_train_set, in_test_set):
    for groups_number in xrange(4):
        freq_group_filter = \
            frequency_chunking.FrequencyGroupFilterWrapper(in_cut_head = groups_number)
        quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter)
        print '%d head groups cut: classification quality = %f' % (groups_number, quality)
def middle_cutting_experiment(in_train_set, in_test_set):
    for percent in [35, 40, 45]:
        freq_group_filter = \
            frequency_chunking.PrecisePercentInvertedFilterWrapper(in_cut_head = percent, in_cut_tail = percent)
        quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter)
        print '%d middle percent cut: classification quality = %f' % (100 - 2 * percent, quality)
def head_cutting_experiment(in_train_set, in_test_set):
    for percent in [0, 10, 20, 30]:
        freq_group_filter = \
            frequency_chunking.PrecisePercentFilterWrapper(in_cut_head = percent)
        quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter)
        print '%d head percent cut: classification quality = %f' % (percent, quality)