Exemple #1
0
def main():
  stopword_pattern = rk.buildStopwordRegExPattern("../data/SmartStoplist.txt")
  # training file
  build_text_vector("../data/Train.csv", stopword_pattern)
  build_nontext_vector("../data/Train.csv", "LocationNorm", 4, True)
  build_nontext_vector("../data/Train.csv", "ContractType", 5, True)
  build_nontext_vector("../data/Train.csv", "ContractTime", 6, True)
  build_nontext_vector("../data/Train.csv", "Company", 7, True)
  build_nontext_vector("../data/Train.csv", "Category", 8, True)
  build_nontext_vector("../data/Train.csv", "SourceName", 11, True)
  # test file
  build_text_vector("../data/Valid.csv", stopword_pattern)
  build_nontext_vector("../data/Valid.csv", "LocationNorm", 4, True)
  build_nontext_vector("../data/Valid.csv", "ContractType", 5, True)
  build_nontext_vector("../data/Valid.csv", "ContractTime", 6, True)
  build_nontext_vector("../data/Valid.csv", "Company", 7, True)
  build_nontext_vector("../data/Valid.csv", "Category", 8, True)
  build_nontext_vector("../data/Valid.csv", "SourceName", 9, True)
Exemple #2
0
def main():
    stopword_pattern = rk.buildStopwordRegExPattern(
        "../data/SmartStoplist.txt")
    # training file
    build_text_vector("../data/Train.csv", stopword_pattern)
    build_nontext_vector("../data/Train.csv", "LocationNorm", 4, True)
    build_nontext_vector("../data/Train.csv", "ContractType", 5, True)
    build_nontext_vector("../data/Train.csv", "ContractTime", 6, True)
    build_nontext_vector("../data/Train.csv", "Company", 7, True)
    build_nontext_vector("../data/Train.csv", "Category", 8, True)
    build_nontext_vector("../data/Train.csv", "SourceName", 11, True)
    # test file
    build_text_vector("../data/Valid.csv", stopword_pattern)
    build_nontext_vector("../data/Valid.csv", "LocationNorm", 4, True)
    build_nontext_vector("../data/Valid.csv", "ContractType", 5, True)
    build_nontext_vector("../data/Valid.csv", "ContractTime", 6, True)
    build_nontext_vector("../data/Valid.csv", "Company", 7, True)
    build_nontext_vector("../data/Valid.csv", "Category", 8, True)
    build_nontext_vector("../data/Valid.csv", "SourceName", 9, True)
Exemple #3
0
        log(web.data())
        web.header('Content-Type', 'application/json')
        return json.dumps(crackr(web.data(), skillfilter='post'))
    
class NaivePost:
    def POST(self):
        log(web.data())
        web.header('Content-Type', 'application/json')
        return json.dumps(naive(web.data(), skillfilter='post'))
    
# Algorithms

from candygen import skills, buildskilldict
skilldict = buildskilldict(skills)
from rake import buildStopwordRegExPattern
stopwordpattern = buildStopwordRegExPattern("stoplist.txt")

# Rapid Automatic Keyword Extraction (RAKE)
from rake import splitSentences, generateCandidateKeywords
from rake import calculateWordScores, operator
def rake(text, skillfilter=None):
    # preprocess text
    text = textprocess.preprocess(text)    
    
    # tokenize
    sentenceList = splitSentences(text)
    phraseList = generateCandidateKeywords(sentenceList, stopwordpattern)
    
    # generate candidates and calculate scores
    wordscores = calculateWordScores(phraseList)
    keywordcandidates = generateCandidateKeywordScores(phraseList, wordscores)