Esempio n. 1
0
def extractInfo(sent):

    degreeModel = []

    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
    #   print degreeSent.printSentenct()
    #    f.write( degreeSent.printSentenct().get_string() +"\n\n" )
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
    #    print degreeSent.printLabeledArray()
    matcher = matchSent(degree_patterns.degree_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    matcher = matchSent(major_patterns.major_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    matcher = matchSent(prefer_patterns.prefer_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)

    return degreeSent, degreeModel
Esempio n. 2
0
def processTitle(title):
    titleSent = JobSentence(title.lower().split())
    labeler.labelSentence(titleSent)    
    labeledArray = titleSent.getLabeledArray(labeler.ontoDict)
  #  print titleSent.printLabeledArray()    
    matcher =  matchSent(matchers, labeledArray)
    return  matcher
Esempio n. 3
0
def processTitle(title):
    titleSent = JobSentence(title.lower().split())
    labeler.labelSentence(titleSent)
    labeledArray = titleSent.getLabeledArray(labeler.ontoDict)
    #  print titleSent.printLabeledArray()
    matcher = matchSent(matchers, labeledArray)
    return matcher
Esempio n. 4
0
def extractInfo(sent):
  
    degreeModel = []    
    
    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
 #   print degreeSent.printSentenct()  
#    f.write( degreeSent.printSentenct().get_string() +"\n\n" )
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
#    print degreeSent.printLabeledArray()    
    matcher =  matchSent(degree_patterns.degree_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)
        
    matcher =  matchSent(major_patterns.major_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)
        
    matcher =  matchSent(prefer_patterns.prefer_matchers, labeledArray)
    if matcher is not None:
        output = matcher.output()
        degreeModel.extend(output)
        
    return degreeSent, degreeModel 
Esempio n. 5
0
def getLabeledSentence(data_set_name, outfileName):
    labelGrammer = createDegreeGrammar()
    data = datautils.loadJson(data_set_name)

    pattern1 = [
        "DE_LEVEL",
        StarRepetition([",", "DE_LEVEL"]),
        QuestionRepetition(["OR", "DE_LEVEL"]), "DEGREE"
    ]
    fst = TokenRegex(pattern1)

    matchSum = 0
    f = open(outfileName, "w")
    for item in data:
        #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)
        labeledArray = degreeSent.getLabeledArray()
        array = [x[0] for x in labeledArray]
        print item[0], ":  ", array
        match = fst.match(array)
        print "match=", match
        if match:
            matchSum += 1
    #  printTrack(track)
        f.write(item[0] + "\n\n")
        table = degreeSent.printLabeledArray()
    #    f.write( table.get_string()  + "\n\n" )
    print "match rate =", str(matchSum) + "/" + str(
        len(data)) + "=", matchSum / len(data)
Esempio n. 6
0
def getLabeledSentence(data_set_name, outfileName):    
    labelGrammer =  createDegreeGrammar()        
    data = datautils.loadJson(data_set_name)
    
    pattern1 = ["DE_LEVEL", StarRepetition([",","DE_LEVEL"]), QuestionRepetition(["OR","DE_LEVEL"]),"DEGREE" ]
    fst = TokenRegex(pattern1) 
    
    matchSum = 0 
    f = open(outfileName, "w")
    for item in data:
    #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)
        labeledArray = degreeSent.getLabeledArray()
        array = [x[0] for x in labeledArray ]
        print item[0], ":  " ,array
        match = fst.match(array) 
        print "match=", match
        if match :
            matchSum += 1 
      #  printTrack(track)
        f.write (  item[0] + "\n\n")         
        table = degreeSent.printLabeledArray()  
    #    f.write( table.get_string()  + "\n\n" )      
    print "match rate =" , str(matchSum)+"/"+str(len(data)) + "=", matchSum/len(data)
Esempio n. 7
0
def labelSent(labeler, matcher, sent):
    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
 #   print degreeSent.printSentenct()  
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
#    print degreeSent.printLabeledArray()    
    i = matcher.findMatching(labeledArray) 
    return i, degreeSent   
Esempio n. 8
0
def labelSentByMatchers(matchers, sent):
    degreeSent = JobSentence(sent.split())
    labeler.labelSentence(degreeSent)
 #   print degreeSent.printSentenct()  
#    f.write( degreeSent.printSentenct().get_string() +"\n\n" )
    labeledArray = degreeSent.getLabeledArray(labeler.ontoDict)
#    print degreeSent.printLabeledArray()    
    matcher =  matchSent(matchers, labeledArray)
    return degreeSent, matcher
Esempio n. 9
0
def labelSent(sent):
    tokens, posTags = tagSentence(sent)
    degreeSent = JobSentence(tokens, posTags)
    labeler.labelSentence(degreeSent)
    #   print degreeSent.printSentenct()

    return degreeSent
Esempio n. 10
0
def labelDegreeSet(data_set_name, outfileName):
    labelGrammer =  createDegreeGrammar()        
    data = datautils.loadJson(data_set_name)
     
    f = open(outfileName, "w")
    for item in data:
    #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)
       
        print item[0]
        f.write (  item[0] + "\n\n") 
        
        table = degreeSent.printSentenct()  
   #     print table.get_string() + "\n\n"
        f.write( table.get_string()  + "\n\n" )        
Esempio n. 11
0
def labelDegreeSet(data_set_name, outfileName):
    labelGrammer = createDegreeGrammar()
    data = datautils.loadJson(data_set_name)

    f = open(outfileName, "w")
    for item in data:
        #    print item
        words = item[2].split()
        degreeSent = JobSentence(words)
        labelGrammer.labelSentence(degreeSent)

        print item[0]
        f.write(item[0] + "\n\n")

        table = degreeSent.printSentenct()
        #     print table.get_string() + "\n\n"
        f.write(table.get_string() + "\n\n")
Esempio n. 12
0
def labelDegree():

    sent01 = "bachelors degree"
    sent02 = "bachelors Degree preferred"
    sent03 = "Bachelors Degree or Equivalent"
    sent04 = "bachelors degree in Computer Science"
    sent05 = "bachelors degree in Computer Science or equivalent"    
    sent06 = "B.S. degree in Computer Science required" 
    sent07 = "Requires a Bachelors degree in Information Systems or related field"
    sent08 = "Bachelors degree in computer science or an equivalent combination of education and/or experience"
    sent09 = "bachelors degree in related field , OR four ( 4 ) years of experience in a directly related field"
    sent10 = "Bachelors or master degree in computer science" 
    sent11 = "Bachelor , Master or Doctorate of Science degree from an accredited course of study , in engineering , computer science , mathematics , physics or chemistry"
       
    labelGrammer =  createDegreeGrammar()
  #  printLabelGrammar(labelGrammer)
    degreeSent = JobSentence(sent06.split())
    labelGrammer.labelSentence(degreeSent)
    degreeSent.printSentenct()  
Esempio n. 13
0
def labelDegree():

    sent01 = "bachelors degree"
    sent02 = "bachelors Degree preferred"
    sent03 = "Bachelors Degree or Equivalent"
    sent04 = "bachelors degree in Computer Science"
    sent05 = "bachelors degree in Computer Science or equivalent"
    sent06 = "B.S. degree in Computer Science required"
    sent07 = "Requires a Bachelors degree in Information Systems or related field"
    sent08 = "Bachelors degree in computer science or an equivalent combination of education and/or experience"
    sent09 = "bachelors degree in related field , OR four ( 4 ) years of experience in a directly related field"
    sent10 = "Bachelors or master degree in computer science"
    sent11 = "Bachelor , Master or Doctorate of Science degree from an accredited course of study , in engineering , computer science , mathematics , physics or chemistry"

    labelGrammer = createDegreeGrammar()
    #  printLabelGrammar(labelGrammer)
    degreeSent = JobSentence(sent06.split())
    labelGrammer.labelSentence(degreeSent)
    degreeSent.printSentenct()