Ejemplo n.º 1
0
from keras.models import model_from_json
from preprocessor import Preprocessor
from inputExtractor import InputExtractor
from skillExtractNN import SkillsExtractorNN
from textFormater import TextFormater
import pandas as pd
import numpy as np
from itertools import islice

in_extractor = InputExtractor()
pp = Preprocessor()
tf = TextFormater()

word_features_dim, dense_features_dim = pp.getDim()
clf = SkillsExtractorNN(word_features_dim, dense_features_dim)

df = pd.read_excel("dataset/training/resumes.xlsx", sheet_name=0)
df = df.replace(np.nan, '', regex=True)

every_phrase_vec = []
every_context_vec = []
every_phr_cox_vec = []
every_y = []

for index, row in df.iterrows():
    cv = tf.format(row[0])
    phrases, context, np_tags, context_tags = in_extractor.extract(cv)
    phr_vec, cox_vec, phr_cox_vec, y = pp.preprocess(phrases, context, np_tags,
                                                     context_tags,
                                                     row[1].strip().split("|"))
    every_phrase_vec += phr_vec