Example #1
0
def feature_consolidation(resume_text, top_unigram_list, top_bigram_list):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list)]
    bi_feats = [bigram_features(resume_text, top_bigram_list)]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
def feature_consolidation(resume_text, top_unigram_list, top_bigram_list):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list)]
    bi_feats = [bigram_features(resume_text, top_bigram_list)]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
def feature_consolidation(resumes, top_unigram_list, top_bigram_list,  add_true_score=False):
    """
    Function to consolidate all the featuresets for the training data

    Args:
        resumes -- list of tuples [(resume_text, tag, filename), (resume_text, tag, filename)...]
        top_unigram_list -- list of top unigrams from the training dataset
        top_bigram_list -- list of top bigrams from the training dataset
        add_true_score -- boolean (default: False)

    Returns:
        consolidated_features -- list of consolidated features
    """
    uni_feats = [unigram_features(resume_text, top_unigram_list) for (resume_text, label, fname) in resumes]
    bi_feats = [bigram_features(resume_text, top_bigram_list) for (resume_text, label, fname) in resumes]
    consolidated_features = []
    ind = 0
    while ind < len(uni_feats):
        consolidated_features.append(uni_feats[ind] + bi_feats[ind])
        ind += 1
    return consolidated_features
Example #4
0
def test_should_return_zero_if_bigram_is_not_present_in_top_bigrams():
    bigram_feats = bigram_features(sample_failing_text, top_bigrams)
    for bi_feat in bigram_feats:
        assert_equals(0, bi_feat)
Example #5
0
def test_should_return_one_if_bigram_is_present_in_top_bigrams():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    for bi_feat in bigram_feats:
        assert_equals(1, bi_feat)
Example #6
0
def test_lengths_of_bigram_features_and_top_bigrams_should_be_equal():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    assert_equals(len(bigram_feats), len(top_bigrams))
Example #7
0
def test_bigram_features_for_a_given_text_should_not_be_empty():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    assert_true(len(bigram_feats))
def test_should_return_zero_if_bigram_is_not_present_in_top_bigrams():
    bigram_feats = bigram_features(sample_failing_text, top_bigrams)
    for bi_feat in bigram_feats:
        assert_equals(0, bi_feat)
def test_should_return_one_if_bigram_is_present_in_top_bigrams():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    for bi_feat in bigram_feats:
        assert_equals(1, bi_feat)
def test_lengths_of_bigram_features_and_top_bigrams_should_be_equal():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    assert_equals(len(bigram_feats), len(top_bigrams))
def test_bigram_features_for_a_given_text_should_not_be_empty():
    bigram_feats = bigram_features(sample_passing_text, top_bigrams)
    assert_true(len(bigram_feats))