def test_should_append_zero_if_unigram_is_not_present_in_text(): test_text = "Responsible for developing and implementing high level security features successfully. Led a team of 10 people and pushed a new enhancement in 4 weeks without any bugs" unigram_list = ["responsible", "developed", "monitored", "solution", "java", "c++", "strong"] unigram_feature = unigram_features(test_text, unigram_list) eq_(0, unigram_feature[3]) eq_(0, unigram_feature[4]) eq_(0, unigram_feature[5]) eq_(0, unigram_feature[6])
def test_should_stem_words_and_create_feature_set(): test_text = "Responsible for developing and implementing high level security features successfully" unigram_list = ["responsible", "develop", "implement", "success"] unigram_feature = unigram_features(test_text, unigram_list) expected_value = 1 eq_(expected_value, unigram_feature[0]) eq_(expected_value, unigram_feature[1]) eq_(expected_value, unigram_feature[2]) eq_(expected_value, unigram_feature[3])
def test_should_stem_words_and_create_feature_set(): test_text = "Responsible for developing and implementing high level security features successfully" unigram_list = ['responsible', 'develop', 'implement', 'success'] unigram_feature = unigram_features(test_text, unigram_list) expected_value = 1 eq_(expected_value, unigram_feature[0]) eq_(expected_value, unigram_feature[1]) eq_(expected_value, unigram_feature[2]) eq_(expected_value, unigram_feature[3])
def test_should_append_zero_if_unigram_is_not_present_in_text(): test_text = "Responsible for developing and implementing high level security features successfully. Led a team of 10 people and pushed a new enhancement in 4 weeks without any bugs" unigram_list = [ 'responsible', 'developed', 'monitored', 'solution', 'java', 'c++', 'strong' ] unigram_feature = unigram_features(test_text, unigram_list) eq_(0, unigram_feature[3]) eq_(0, unigram_feature[4]) eq_(0, unigram_feature[5]) eq_(0, unigram_feature[6])
def feature_consolidation(resume_text, top_unigram_list, top_bigram_list): """ Function to consolidate all the featuresets for the training data Args: top_unigram_list -- list of top unigrams from the training dataset top_bigram_list -- list of top bigrams from the training dataset Returns: consolidated_features -- list of consolidated features """ uni_feats = [unigram_features(resume_text, top_unigram_list)] bi_feats = [bigram_features(resume_text, top_bigram_list)] consolidated_features = [] ind = 0 while ind < len(uni_feats): consolidated_features.append(uni_feats[ind] + bi_feats[ind]) ind += 1 return consolidated_features
def feature_consolidation(resumes, top_unigram_list, top_bigram_list, add_true_score=False): """ Function to consolidate all the featuresets for the training data Args: resumes -- list of tuples [(resume_text, tag, filename), (resume_text, tag, filename)...] top_unigram_list -- list of top unigrams from the training dataset top_bigram_list -- list of top bigrams from the training dataset add_true_score -- boolean (default: False) Returns: consolidated_features -- list of consolidated features """ uni_feats = [unigram_features(resume_text, top_unigram_list) for (resume_text, label, fname) in resumes] bi_feats = [bigram_features(resume_text, top_bigram_list) for (resume_text, label, fname) in resumes] consolidated_features = [] ind = 0 while ind < len(uni_feats): consolidated_features.append(uni_feats[ind] + bi_feats[ind]) ind += 1 return consolidated_features
def test_should_generate_unigrams_from_text_and_create_feature_set(): test_text = "Responsible for implementing high level security features successfully" unigram_list = ["responsible", "developed", "monitored", "solution"] unigram_feature = unigram_features(test_text, unigram_list) expected_value = 4 eq_(expected_value, len(unigram_feature))
def test_should_generate_unigrams_from_text_and_create_feature_set(): test_text = "Responsible for implementing high level security features successfully" unigram_list = ['responsible', 'developed', 'monitored', 'solution'] unigram_feature = unigram_features(test_text, unigram_list) expected_value = 4 eq_(expected_value, len(unigram_feature))