Beispiel #1
0
def get_content_from_text(lines_of_text, use_wiki=True, vocab=None):

    grams = txt_list_to_grams(lines_of_text, 0)
    #print(grams)

    h = []

    s = []

    for g in grams:
        if has_more_than_x_russian_symbols(g, 2):
            s.append(g)
        else:
            h.append(g)
    #print(s);print(h)
    if vocab == None:
        soft_skills = get_soft_skills2(s, h)
    else:
        soft_skills = get_soft_skills2(s, h, vocab)

    if len(h) > 0 and use_wiki:

        hard_skills = get_hard_skills(h)

        soft_skills_lower = [word.lower() for word in soft_skills]

        h = [
            answer for answer in hard_skills
            if answer.lower() not in soft_skills_lower
        ]

        return soft_skills + h

    return soft_skills
Beispiel #2
0
def get_answer(sample,
               first_vocab=get_prop_set(
                   TruePath('model_data/objections_used.txt')),
               second_vocab=get_prop_set(
                   TruePath('./model_data/objections_back_used.txt'))):

    ngrams, lines = txt_list_to_grams(sample, 0)

    first_flag, first_ind, second_flag, second_ind = False, None, None, None

    for i in range(len(lines)):
        g, number = ngrams[i]
        if number == 2 and has_feature(g, first_vocab):
            first_flag, first_ind, second_flag, second_ind = True, i, False, None

            for j in range(i + 1, len(lines)):
                h, nb = ngrams[j]
                if nb == 1 and has_feature(h, second_vocab):
                    second_flag, second_ind = True, j

                    break
            break

    if second_flag == False and sum(
        [num == 1 for line, num in lines[first_ind:]]) > 8:
        second_flag = '50/50'

    return first_flag, get_position_line(
        lines, first_ind), second_flag, get_position_line(lines, second_ind)
def get_content_from_text(lines_of_text, vocab = None):
    
    grams = txt_list_to_grams(lines_of_text,0)
    #print(grams)
    
    h = []
    
    s = []
    
    for g in grams:
        if has_more_than_x_russian_symbols(g,2):
            s.append(g)
        else:
            h.append(g)
    #print(s);print(h)
    if vocab == None:
        soft_skills = get_soft_skills2(s, h)
    else:
        soft_skills = get_soft_skills2(s, h, vocab)
    
    
    return soft_skills
Beispiel #4
0
from wikipedia_api import get_hard_skills

from prepare_functions import *

my_dir = os.path.dirname(__file__)

files = [
    os.path.join(my_dir, 'train_samples', f'{i}.txt') for i in range(0, 25)
]

if __name__ == '__main__':

    with io.open(files[21], 'r', encoding='utf-8') as f:
        doclines = f.readlines()

    grams = txt_list_to_grams(doclines, 0)

    print_list(grams)

    h = []

    s = []

    for g in grams:
        if has_more_than_x_russian_symbols(g, 2):
            s.append(g)
        else:
            h.append(g)

    #soft_skills = get_soft_skills(s+h)