def feature_consolidation(resume_text, top_unigram_list, top_bigram_list): """ Function to consolidate all the featuresets for the training data Args: top_unigram_list -- list of top unigrams from the training dataset top_bigram_list -- list of top bigrams from the training dataset Returns: consolidated_features -- list of consolidated features """ uni_feats = [unigram_features(resume_text, top_unigram_list)] bi_feats = [bigram_features(resume_text, top_bigram_list)] consolidated_features = [] ind = 0 while ind < len(uni_feats): consolidated_features.append(uni_feats[ind] + bi_feats[ind]) ind += 1 return consolidated_features
def feature_consolidation(resumes, top_unigram_list, top_bigram_list, add_true_score=False): """ Function to consolidate all the featuresets for the training data Args: resumes -- list of tuples [(resume_text, tag, filename), (resume_text, tag, filename)...] top_unigram_list -- list of top unigrams from the training dataset top_bigram_list -- list of top bigrams from the training dataset add_true_score -- boolean (default: False) Returns: consolidated_features -- list of consolidated features """ uni_feats = [unigram_features(resume_text, top_unigram_list) for (resume_text, label, fname) in resumes] bi_feats = [bigram_features(resume_text, top_bigram_list) for (resume_text, label, fname) in resumes] consolidated_features = [] ind = 0 while ind < len(uni_feats): consolidated_features.append(uni_feats[ind] + bi_feats[ind]) ind += 1 return consolidated_features
def test_should_return_zero_if_bigram_is_not_present_in_top_bigrams(): bigram_feats = bigram_features(sample_failing_text, top_bigrams) for bi_feat in bigram_feats: assert_equals(0, bi_feat)
def test_should_return_one_if_bigram_is_present_in_top_bigrams(): bigram_feats = bigram_features(sample_passing_text, top_bigrams) for bi_feat in bigram_feats: assert_equals(1, bi_feat)
def test_lengths_of_bigram_features_and_top_bigrams_should_be_equal(): bigram_feats = bigram_features(sample_passing_text, top_bigrams) assert_equals(len(bigram_feats), len(top_bigrams))
def test_bigram_features_for_a_given_text_should_not_be_empty(): bigram_feats = bigram_features(sample_passing_text, top_bigrams) assert_true(len(bigram_feats))