def extractFromJob(self, node, fieldNode): for start, end in fieldNode.getExperienceIndex().items(): for line in self.content[start : end]: if 'work experience' == line:#don't process work experience, only process experience break wordsInList = word_tokenize(line) for word in wordsInList: if word in self.getExperienceKeywordList():# to tackle solely requirements section as there can be a mixture of key requirements and skills section self.extractedContent.add(line) for line in self.extractedContent: wordsInList = word_tokenize(line) try: annotatedList = [i.get('surfaceForm') for i in annotate(line)] yearIndex = None year = 'year' years = 'years' if years in wordsInList or year in wordsInList: yearIndex = wordsInList.index(years) or wordsInList.index(year) for word in annotatedList: if yearIndex is not None and word[0].isalpha(): experienceSubNode = ExperienceSubNode(word, " ".join(wordsInList[yearIndex-1 : yearIndex+1])) node.addExperience(experienceSubNode) else: if word[0].isalpha(): experienceSubNode = ExperienceSubNode(word, None) node.addExperience(experienceSubNode) except: pass
def parse(self, node, fieldNode): if bool(fieldNode.getSkillsIndex()): for start, end in fieldNode.getSkillsIndex().items(): for line in self.content[start : end]: if line and 'page' not in line: self.extractedContent.add(line) listString = "\n".join(line for line in self.extractedContent) self.extractedContent = [i.get('surfaceForm') for i in annotate(listString)] for line in self.extractedContent: if line not in self.getSkillKeywordList(): node.addSkill(line)
def parse(self, node, fieldNode): if bool(fieldNode.getLanguageIndex()): for start, end in fieldNode.getLanguageIndex().items(): for line in self.content[start : end]: self.extractedContent.add(line) listString = "\n".join(line for line in self.extractedContent) if len(self.extractedContent) > 0: self.extractedContent = annotate(listString) for line in self.extractedContent: if line.get('surfaceForm') not in self.getLanguageKeywordsList() and 'language' in line.get('types').lower(): node.addLanguage(line.get('surfaceForm'))
def parse(self, node, fieldNode): if bool(fieldNode.getSkillsIndex()): for start, end in fieldNode.getSkillsIndex().items(): for line in self.content[start:end]: if line and 'page' not in line: self.extractedContent.add(line) listString = "\n".join(line for line in self.extractedContent) self.extractedContent = [ i.get('surfaceForm') for i in annotate(listString) ] for line in self.extractedContent: if line not in self.getSkillKeywordList(): node.addSkill(line)
def parse(self, node, fieldNode): if bool(fieldNode.getLanguageIndex()): for start, end in fieldNode.getLanguageIndex().items(): for line in self.content[start:end]: self.extractedContent.add(line) listString = "\n".join(line for line in self.extractedContent) if len(self.extractedContent) > 0: self.extractedContent = annotate(listString) for line in self.extractedContent: if line.get( 'surfaceForm') not in self.getLanguageKeywordsList( ) and 'language' in line.get('types').lower(): node.addLanguage(line.get('surfaceForm'))
def extractEducationFromJob(self, node, fieldNode): for start, end in fieldNode.getEducationIndex().items(): for line in self.content[start : end]: wordsList = word_tokenize(line) if len(wordsList) > 0: for word in wordsList: if word in self.getEducationLevel(): self.extractedContent.add(line) listString = "\n".join(line for line in self.extractedContent) try: self.extractedContent = [i.get('surfaceForm') for i in annotate(listString)] for line in self.extractedContent: if line in self.getEducationLevel() and line not in self.getIgnoredKeywords(): educationType = line if line not in self.getEducationLevel() and line not in self.getIgnoredKeywords(): node.addEducation(educationType +" in "+ line, None) except: pass