def main(): stopword_pattern = rk.buildStopwordRegExPattern("../data/SmartStoplist.txt") # training file build_text_vector("../data/Train.csv", stopword_pattern) build_nontext_vector("../data/Train.csv", "LocationNorm", 4, True) build_nontext_vector("../data/Train.csv", "ContractType", 5, True) build_nontext_vector("../data/Train.csv", "ContractTime", 6, True) build_nontext_vector("../data/Train.csv", "Company", 7, True) build_nontext_vector("../data/Train.csv", "Category", 8, True) build_nontext_vector("../data/Train.csv", "SourceName", 11, True) # test file build_text_vector("../data/Valid.csv", stopword_pattern) build_nontext_vector("../data/Valid.csv", "LocationNorm", 4, True) build_nontext_vector("../data/Valid.csv", "ContractType", 5, True) build_nontext_vector("../data/Valid.csv", "ContractTime", 6, True) build_nontext_vector("../data/Valid.csv", "Company", 7, True) build_nontext_vector("../data/Valid.csv", "Category", 8, True) build_nontext_vector("../data/Valid.csv", "SourceName", 9, True)
def main(): stopword_pattern = rk.buildStopwordRegExPattern( "../data/SmartStoplist.txt") # training file build_text_vector("../data/Train.csv", stopword_pattern) build_nontext_vector("../data/Train.csv", "LocationNorm", 4, True) build_nontext_vector("../data/Train.csv", "ContractType", 5, True) build_nontext_vector("../data/Train.csv", "ContractTime", 6, True) build_nontext_vector("../data/Train.csv", "Company", 7, True) build_nontext_vector("../data/Train.csv", "Category", 8, True) build_nontext_vector("../data/Train.csv", "SourceName", 11, True) # test file build_text_vector("../data/Valid.csv", stopword_pattern) build_nontext_vector("../data/Valid.csv", "LocationNorm", 4, True) build_nontext_vector("../data/Valid.csv", "ContractType", 5, True) build_nontext_vector("../data/Valid.csv", "ContractTime", 6, True) build_nontext_vector("../data/Valid.csv", "Company", 7, True) build_nontext_vector("../data/Valid.csv", "Category", 8, True) build_nontext_vector("../data/Valid.csv", "SourceName", 9, True)
log(web.data()) web.header('Content-Type', 'application/json') return json.dumps(crackr(web.data(), skillfilter='post')) class NaivePost: def POST(self): log(web.data()) web.header('Content-Type', 'application/json') return json.dumps(naive(web.data(), skillfilter='post')) # Algorithms from candygen import skills, buildskilldict skilldict = buildskilldict(skills) from rake import buildStopwordRegExPattern stopwordpattern = buildStopwordRegExPattern("stoplist.txt") # Rapid Automatic Keyword Extraction (RAKE) from rake import splitSentences, generateCandidateKeywords from rake import calculateWordScores, operator def rake(text, skillfilter=None): # preprocess text text = textprocess.preprocess(text) # tokenize sentenceList = splitSentences(text) phraseList = generateCandidateKeywords(sentenceList, stopwordpattern) # generate candidates and calculate scores wordscores = calculateWordScores(phraseList) keywordcandidates = generateCandidateKeywordScores(phraseList, wordscores)