def head_cutting_experiment(in_train_set, in_test_set): for chunks_number in xrange(4): freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper( in_chunks_number=10, in_cut_head=chunks_number) quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter) print '%d head chunks cut: classification quality = %f' % ( chunks_number, quality)
def middle_cutting_experiment(in_train_set, in_test_set): for percent in [35, 40, 45]: freq_group_filter = \ frequency_chunking.PrecisePercentInvertedFilterWrapper(in_cut_head = percent, in_cut_tail = percent) quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter) print '%d middle percent cut: classification quality = %f' % ( 100 - 2 * percent, quality)
def head_cutting_experiment(in_train_set, in_test_set): for percent in [0, 10, 20, 30]: freq_group_filter = \ frequency_chunking.PrecisePercentFilterWrapper(in_cut_head = percent) quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter) print '%d head percent cut: classification quality = %f' % (percent, quality)
def chunk_window_experiment(in_train_set, in_test_set): window_size = 5 for begin_chunk in xrange(frequency_chunking.DEFAULT_CHUNKS_NUMBER - window_size + 1): cut_head = begin_chunk cut_tail = frequency_chunking.DEFAULT_CHUNKS_NUMBER - begin_chunk - window_size #(cut_left, cut_right) = get_filtered_distribution_test(cut_head, cut_tail) # print "%d,%d --> [%s:%s]" % (cut_head, cut_tail, cut_left, cut_right) freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(in_cut_head = cut_head, in_cut_tail = cut_tail) quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter) print '%d -- %d chunks used: classification quality = %f' % \ (begin_chunk, begin_chunk + window_size - 1, quality)
def chunk_window_experiment(in_train_set, in_test_set): window_size = 5 for begin_chunk in xrange(frequency_chunking.DEFAULT_CHUNKS_NUMBER - window_size + 1): cut_head = begin_chunk cut_tail = frequency_chunking.DEFAULT_CHUNKS_NUMBER - begin_chunk - window_size #(cut_left, cut_right) = get_filtered_distribution_test(cut_head, cut_tail) # print "%d,%d --> [%s:%s]" % (cut_head, cut_tail, cut_left, cut_right) freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper( in_cut_head=cut_head, in_cut_tail=cut_tail) quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter) print '%d -- %d chunks used: classification quality = %f' % \ (begin_chunk, begin_chunk + window_size - 1, quality)
def head_cutting_experiment(in_train_set, in_test_set): for chunks_number in xrange(4): freq_chunk_filter = frequency_chunking.FrequencyChunkFilterWrapper(in_chunks_number = 10, in_cut_head = chunks_number) quality = classify.classify_texts(in_train_set, in_test_set, freq_chunk_filter) print '%d head chunks cut: classification quality = %f' % (chunks_number, quality)
def head_cutting_experiment(in_train_set, in_test_set): for groups_number in xrange(4): freq_group_filter = \ frequency_chunking.FrequencyGroupFilterWrapper(in_cut_head = groups_number) quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter) print '%d head groups cut: classification quality = %f' % (groups_number, quality)
def middle_cutting_experiment(in_train_set, in_test_set): for percent in [35, 40, 45]: freq_group_filter = \ frequency_chunking.PrecisePercentInvertedFilterWrapper(in_cut_head = percent, in_cut_tail = percent) quality = classify.classify_texts(in_train_set, in_test_set, freq_group_filter) print '%d middle percent cut: classification quality = %f' % (100 - 2 * percent, quality)