def generateKaggleSubmission(weights, outfilename): with open(outfilename, 'w') as f: writer = csv.DictWriter(f, fieldnames=['Id', 'Prediction']) writer.writeheader() # Test data is used for private leaderboard testData = dataIterator(TESTKEY, test_mode=True) for i, (counts, _) in enumerate(testData): predictedLabel, _ = predict(counts, weights, ALL_LABELS) predictedIndex = ALL_LABELS.index(predictedLabel) writer.writerow({ 'Id': 'test-{}'.format(i), 'Prediction': predictedIndex}) # Dev data is used for public leaderboard devData = dataIterator(DEVKEY, test_mode=False) devCorrect = 0 devTotal = 0 for i, (counts, label) in enumerate(devData): devTotal += 1 predictedLabel, _ = predict(counts, weights, ALL_LABELS) devCorrect += (predictedLabel == label) predictedIndex = ALL_LABELS.index(predictedLabel) writer.writerow({ 'Id': 'dev-{}'.format(i), 'Prediction': predictedIndex}) devAccuracy = float(devCorrect) / devTotal print 'Dev accuracy is ', devAccuracy, '({} correct of {})'.format(devCorrect, devTotal) print 'Kaggle submission saved to', outfilename, ('. Sanity check: ' 'public leaderboard accuracy should be '), devAccuracy, 'on submission.'
def generateKaggleSubmission(weights,outfilename): with open(outfilename, 'w') as f: writer = csv.DictWriter(f, fieldnames=['Id', 'Prediction']) writer.writeheader() # Test data is used for private leaderboard testData = dataIterator(TESTKEY,test_mode=True) for i,(counts,_) in enumerate(testData): predictedLabel,_ = predict(counts,weights,ALL_LABELS) predictedIndex = ALL_LABELS.index(predictedLabel) writer.writerow({ 'Id': 'test-{}'.format(i), 'Prediction': predictedIndex}) # Dev data is used for public leaderboard devData = dataIterator(DEVKEY,test_mode=False) devCorrect = 0 devTotal = 0 for i,(counts,label) in enumerate(devData): devTotal += 1 predictedLabel,_ = predict(counts,weights,ALL_LABELS) devCorrect += (predictedLabel == label) predictedIndex = ALL_LABELS.index(predictedLabel) writer.writerow({ 'Id': 'dev-{}'.format(i), 'Prediction': predictedIndex}) devAccuracy = float(devCorrect) / devTotal print 'Dev accuracy is ', devAccuracy, '({} correct of {})'.format(devCorrect, devTotal) print 'Kaggle submission saved to', outfilename, ('. Sanity check: ' 'public leaderboard accuracy should be '), devAccuracy, 'on submission.'
def setup_module(): # Need to do this because the dataIterator function depends # on the BOW file to be generated. global ac_train global ac_dev docsToBOWs(TRAINKEY) docsToBOWs(DEVKEY) ac_train = getAllCounts(dataIterator(TRAINKEY)) ac_dev = getAllCounts(dataIterator(DEVKEY))
def setup_module(): # Need to do this because the dataIterator function depends # on the BOW file to be generated. global ac_train global ac_dev docsToBOWs(TRAINKEY) docsToBOWs(DEVKEY) ac_train = getAllCounts (dataIterator (TRAINKEY)) ac_dev = getAllCounts (dataIterator (DEVKEY))
def evalClassifier(weights, outfilename, testfile, test_mode=False): with open(outfilename, 'w') as outfile: for counts, label in dataIterator(testfile, test_mode): # iterate through eval set print >> outfile, predict(counts, weights, ALL_LABELS)[0] # print prediction to file if test_mode: return else: return gtnlplib.scorer.getConfusion(testfile, outfilename) # run the scorer on the prediction file
def evalClassifier(weights,outfilename,testfile,test_mode=False): with open(outfilename,'w') as outfile: for counts,label in dataIterator(testfile,test_mode): #iterate through eval set print >>outfile, predict(counts,weights,ALL_LABELS)[0] #print prediction to file if test_mode: return else: return gtnlplib.scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file