def test_should_append_zero_if_unigram_is_not_present_in_text():
    test_text = "Responsible for developing and implementing high level security features successfully. Led a team of 10 people and pushed a new enhancement in 4 weeks without any bugs"
    unigram_list = ["responsible", "developed", "monitored", "solution", "java", "c++", "strong"]
    unigram_feature = unigram_features(test_text, unigram_list)
    eq_(0, unigram_feature[3])
    eq_(0, unigram_feature[4])
    eq_(0, unigram_feature[5])
    eq_(0, unigram_feature[6])
def test_should_stem_words_and_create_feature_set():
    test_text = "Responsible for developing and implementing high level security features successfully"
    unigram_list = ["responsible", "develop", "implement", "success"]
    unigram_feature = unigram_features(test_text, unigram_list)
    expected_value = 1
    eq_(expected_value, unigram_feature[0])
    eq_(expected_value, unigram_feature[1])
    eq_(expected_value, unigram_feature[2])
    eq_(expected_value, unigram_feature[3])
Esempio n. 3
0
def test_should_stem_words_and_create_feature_set():
    test_text = "Responsible for developing and implementing high level security features successfully"
    unigram_list = ['responsible', 'develop', 'implement', 'success']
    unigram_feature = unigram_features(test_text, unigram_list)
    expected_value = 1
    eq_(expected_value, unigram_feature[0])
    eq_(expected_value, unigram_feature[1])
    eq_(expected_value, unigram_feature[2])
    eq_(expected_value, unigram_feature[3])
Esempio n. 4
0
def test_should_append_zero_if_unigram_is_not_present_in_text():
    test_text = "Responsible for developing and implementing high level security features successfully. Led a team of 10 people and pushed a new enhancement in 4 weeks without any bugs"
    unigram_list = [
        'responsible', 'developed', 'monitored', 'solution', 'java', 'c++',
        'strong'
    ]
    unigram_feature = unigram_features(test_text, unigram_list)
    eq_(0, unigram_feature[3])
    eq_(0, unigram_feature[4])
    eq_(0, unigram_feature[5])
    eq_(0, unigram_feature[6])
def feature_consolidation(resume_text, top_unigram_list, top_bigram_list):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list)]
    bi_feats = [bigram_features(resume_text, top_bigram_list)]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
Esempio n. 6
0
def feature_consolidation(resume_text, top_unigram_list, top_bigram_list):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list)]
    bi_feats = [bigram_features(resume_text, top_bigram_list)]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
def feature_consolidation(resumes, top_unigram_list, top_bigram_list,  add_true_score=False):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        resumes -- list of tuples [(resume_text, tag, filename), (resume_text, tag, filename)...]
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset
        add_true_score -- boolean (default: False)

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list) for (resume_text, label, fname) in resumes]
    bi_feats = [bigram_features(resume_text, top_bigram_list) for (resume_text, label, fname) in resumes]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
def test_should_generate_unigrams_from_text_and_create_feature_set():
    test_text = "Responsible for implementing high level security features successfully"
    unigram_list = ["responsible", "developed", "monitored", "solution"]
    unigram_feature = unigram_features(test_text, unigram_list)
    expected_value = 4
    eq_(expected_value, len(unigram_feature))
Esempio n. 9
0
def test_should_generate_unigrams_from_text_and_create_feature_set():
    test_text = "Responsible for implementing high level security features successfully"
    unigram_list = ['responsible', 'developed', 'monitored', 'solution']
    unigram_feature = unigram_features(test_text, unigram_list)
    expected_value = 4
    eq_(expected_value, len(unigram_feature))