Beispiel #1
0
def process_resume(email_data):
    path = download_attachment(email_data["attachment_url"])
    text = convert_pdf_to_txt(path)
    text = text.rstrip()
    fp = file("temp.txt", "wb")
    fp.write(text)
    fp.close()
    resume_text_list = getresumecontent("temp.txt")
    resume_text = ';'.join(resume_text_list)
    clean_text = Stopwords.removeStopWords(
        unicode(resume_text, encoding="utf-8"))
    keywords = extract_keywords(resume_text)
    workex = workexfinder("temp.txt")
    lines = workex.split(";;")
    entity_names = []
    string = "".join(lines)
    tokens = nltk.word_tokenize(string)
    tagged = nltk.pos_tag(tokens)
    entities = nltk.chunk.ne_chunk(tagged)
    for entity in entities:
        entity_names.extend(extract_entity_names(entity))
    retval = {}
    retval["keywords"] = keywords
    retval["workex"] = entity_names
    return retval