Esempio n. 1
0
def extractInfo(sent):

    degreeModel = []

    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
    #   print degreeSent.printSentenct()
    #    f.write( degreeSent.printSentenct().get_string() +"\n\n" )
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
    #    print degreeSent.printLabeledArray()
    matcher = matchSent(degree_patterns.degree_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    matcher = matchSent(major_patterns.major_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    matcher = matchSent(prefer_patterns.prefer_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    return degreeSent, degreeModel
Esempio n. 2
0
def getLabeledSentence(data_set_name, outfileName):
    labelGrammer = createDegreeGrammar()
    data = datautils.loadJson(data_set_name)

    pattern1 = [
        "DE_LEVEL",
        StarRepetition([",", "DE_LEVEL"]),
        QuestionRepetition(["OR", "DE_LEVEL"]), "DEGREE"
    ]
    fst = TokenRegex(pattern1)

    matchSum = 0
    f = open(outfileName, "w")
    for item in data:
        #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)
        labeledArray = degreeSent.getLabeledArray()
        array = [x[0] for x in labeledArray]
        print item[0], ":  ", array
        match = fst.match(array)
        print "match=", match
        if match:
            matchSum += 1
    #  printTrack(track)
        f.write(item[0] + "\n\n")
        table = degreeSent.printLabeledArray()
    #    f.write( table.get_string()  + "\n\n" )
    print "match rate =", str(matchSum) + "/" + str(
        len(data)) + "=", matchSum / len(data)
Esempio n. 3
0
def processTitle(title):
    titleSent = JobSentence(title.lower().split())
    labeler.labelSentence(titleSent)
    labeledArray = titleSent.getLabeledArray(labeler.ontoDict)
    #  print titleSent.printLabeledArray()
    matcher = matchSent(matchers, labeledArray)
    return matcher
Esempio n. 4
0
def labelSent(sent):
    tokens, posTags = tagSentence(sent)
    degreeSent = JobSentence(tokens, posTags)
    labeler.labelSentence(degreeSent)
    #   print degreeSent.printSentenct()

    return degreeSent
Esempio n. 5
0
def labelSent(labeler, matcher, sent):
    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
 #   print degreeSent.printSentenct()  
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
#    print degreeSent.printLabeledArray()    
    i = matcher.findMatching(labeledArray) 
    return i, degreeSent   
Esempio n. 6
0
def labelSentByMatchers(matchers, sent):
    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
 #   print degreeSent.printSentenct()  
#    f.write( degreeSent.printSentenct().get_string() +"\n\n" )
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
#    print degreeSent.printLabeledArray()    
    matcher =  matchSent(matchers, labeledArray)
    return degreeSent, matcher
Esempio n. 7
0
def labelDegreeSet(data_set_name, outfileName):
    labelGrammer = createDegreeGrammar()
    data = datautils.loadJson(data_set_name)

    f = open(outfileName, "w")
    for item in data:
        #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)

        print item[0]
        f.write(item[0] + "\n\n")

        table = degreeSent.printSentenct()
        #     print table.get_string() + "\n\n"
        f.write(table.get_string() + "\n\n")
Esempio n. 8
0
def labelDegree():

    sent01 = "bachelors degree"
    sent02 = "bachelors Degree preferred"
    sent03 = "Bachelors Degree or Equivalent"
    sent04 = "bachelors degree in Computer Science"
    sent05 = "bachelors degree in Computer Science or equivalent"
    sent06 = "B.S. degree in Computer Science required"
    sent07 = "Requires a Bachelors degree in Information Systems or related field"
    sent08 = "Bachelors degree in computer science or an equivalent combination of education and/or experience"
    sent09 = "bachelors degree in related field , OR four ( 4 ) years of experience in a directly related field"
    sent10 = "Bachelors or master degree in computer science"
    sent11 = "Bachelor , Master or Doctorate of Science degree from an accredited course of study , in engineering , computer science , mathematics , physics or chemistry"

    labelGrammer = createDegreeGrammar()
    #  printLabelGrammar(labelGrammer)
    degreeSent = JobSentence(sent06.split())
    labelGrammer.labelSentence(degreeSent)
    degreeSent.printSentenct()