def process_resume(email_data): path = download_attachment(email_data["attachment_url"]) text = convert_pdf_to_txt(path) text = text.rstrip() fp = file("temp.txt", "wb") fp.write(text) fp.close() resume_text_list = getresumecontent("temp.txt") resume_text = ';'.join(resume_text_list) clean_text = Stopwords.removeStopWords( unicode(resume_text, encoding="utf-8")) keywords = extract_keywords(resume_text) workex = workexfinder("temp.txt") lines = workex.split(";;") entity_names = [] string = "".join(lines) tokens = nltk.word_tokenize(string) tagged = nltk.pos_tag(tokens) entities = nltk.chunk.ne_chunk(tagged) for entity in entities: entity_names.extend(extract_entity_names(entity)) retval = {} retval["keywords"] = keywords retval["workex"] = entity_names return retval