class FieldLocator(object): def __init__(self): self.fieldNode = FieldsIndexNode() self.contentHeadingIndex = [] def identifyFields(self, node): content = InformationNode.convertStringIntoList(node.getContent()) firstIndex = -1 self.segmentResume(node, content) self.contentHeadingIndex = sorted(set(self.contentHeadingIndex), key=self.contentHeadingIndex.index) self.getStartEndLocationResume(node, content) print(self.contentHeadingIndex) # print(node.getContentHeadingIndex()) # print(self.fieldNode.getExperienceIndex()) # print(self.fieldNode.getEducationIndex()) # print(self.fieldNode.getSkillsIndex()) # for start, end in self.fieldNode.getSkillsIndex().items(): # print(start,'---',end) # for lineNo, line in enumerate(content): # print('------------------',lineNo) # print(line) #Reads the content line by line and check against a list of topic header list. If matches, it will add index of identified headers in the list def segmentResume(self, node, content): for lineNo, line in enumerate(content): wordsInLine = word_tokenize(line) wordsInLine = [word for word in wordsInLine if word not in string.punctuation and word not in stopwords.words('english')] if len(wordsInLine) >0 and len(wordsInLine) <= 4: for word in wordsInLine: if word in self.getTopicHeaders(): self.contentHeadingIndex.append(lineNo) def getStartEndLocationResume(self, node, content): indexOfLast = len(self.contentHeadingIndex) - 1 for index, item in enumerate(self.contentHeadingIndex): if(index + 1 <= indexOfLast): nextIndex = self.contentHeadingIndex[index + 1] else: nextIndex = len(content) wordsList = word_tokenize(content[item]) for word in wordsList: if word in self.getExperienceKeywordList(): self.fieldNode.addExperienceIndex(item, nextIndex) if word in self.getSkillKeywordList(): self.fieldNode.addSkillsIndex(item, nextIndex) if word in self.getEducationKeywordsList(): self.fieldNode.addEducationIndex(item, nextIndex) if word in self.getLanguageKeywordsList(): self.fieldNode.addLanguageIndex(item, nextIndex) if word in self.getLocationKeywordList(): self.fieldNode.addLocationIndex(item, nextIndex) # print('------------>',self.fieldNode.getSkillsIndex()) def getLanguageKeywordsList(self): return ['languages', "language"] def getEducationKeywordsList(self): return ['education', 'university', 'school', 'polytechnic', 'ite', 'academic', 'degree', 'phd', 'study', 'requirements'] def getSkillKeywordList(self): return ['skill', 'skills', 'expertise', 'proficiency', 'technical', 'qualification', 'qualifications', 'responsibilities'] def getExperienceKeywordList(self): return ['work', 'experience', 'employment', 'position', 'requirements'] def getLocationKeywordList(self): return ['location'] def getTopicHeaders(self): return ["summary", "interests", "experience","projects", "languages","skills","expertise", "education","publications", "achievements", "extracurricular activities", "publications","patent", "referees","responsibilities", "certifications", "certification", "objective","portfolio", "interest", "publication", "qualification", "qualifications", "skill", 'paper', 'papers', 'experiences', 'activity', 'activities', 'objective', 'history', 'courses', 'course', 'technical', 'proficiency', 'proficiencies', 'requirements', 'requirement', 'location', 'description', "language"] def getFieldNode(self): return self.fieldNode
def __init__(self): self.fieldNode = FieldsIndexNode() self.contentHeadingIndex = []