def processTitle(title): titleSent = JobSentence(title.lower().split()) labeler.labelSentence(titleSent) labeledArray = titleSent.getLabeledArray(labeler.ontoDict) # print titleSent.printLabeledArray() matcher = matchSent(matchers, labeledArray) return matcher
def extractInfo(sent): degreeModel = [] degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() # f.write( degreeSent.printSentenct().get_string() +"\n\n" ) labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() matcher = matchSent(degree_patterns.degree_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(major_patterns.major_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(prefer_patterns.prefer_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) return degreeSent, degreeModel
def extractInfo(sent): degreeModel = [] degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() # f.write( degreeSent.printSentenct().get_string() +"\n\n" ) labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() matcher = matchSent(degree_patterns.degree_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(major_patterns.major_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(prefer_patterns.prefer_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) return degreeSent, degreeModel
def processTitle(title): titleSent = JobSentence(title.lower().split()) labeler.labelSentence(titleSent) labeledArray = titleSent.getLabeledArray(labeler.ontoDict) # print titleSent.printLabeledArray() matcher = matchSent(matchers, labeledArray) return matcher
def getLabeledSentence(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) pattern1 = ["DE_LEVEL", StarRepetition([",","DE_LEVEL"]), QuestionRepetition(["OR","DE_LEVEL"]),"DEGREE" ] fst = TokenRegex(pattern1) matchSum = 0 f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) labeledArray = degreeSent.getLabeledArray() array = [x[0] for x in labeledArray ] print item[0], ": " ,array match = fst.match(array) print "match=", match if match : matchSum += 1 # printTrack(track) f.write ( item[0] + "\n\n") table = degreeSent.printLabeledArray() # f.write( table.get_string() + "\n\n" ) print "match rate =" , str(matchSum)+"/"+str(len(data)) + "=", matchSum/len(data)
def getLabeledSentence(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) pattern1 = [ "DE_LEVEL", StarRepetition([",", "DE_LEVEL"]), QuestionRepetition(["OR", "DE_LEVEL"]), "DEGREE" ] fst = TokenRegex(pattern1) matchSum = 0 f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) labeledArray = degreeSent.getLabeledArray() array = [x[0] for x in labeledArray] print item[0], ": ", array match = fst.match(array) print "match=", match if match: matchSum += 1 # printTrack(track) f.write(item[0] + "\n\n") table = degreeSent.printLabeledArray() # f.write( table.get_string() + "\n\n" ) print "match rate =", str(matchSum) + "/" + str( len(data)) + "=", matchSum / len(data)
def labelSent(labeler, matcher, sent): degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() i = matcher.findMatching(labeledArray) return i, degreeSent
def labelSentByMatchers(matchers, sent): degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() # f.write( degreeSent.printSentenct().get_string() +"\n\n" ) labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() matcher = matchSent(matchers, labeledArray) return degreeSent, matcher