def writeRhymes(): countryInfo = util.readExamples("country")["country"] hiphoprapInfo = util.readExamples("hiphoprap")["hiphoprap"] popInfo = util.readExamples("pop")["pop"] wordMap = {} lines = countryInfo["lines"] lines += hiphoprapInfo["lines"] lines += popInfo["lines"] for line in lines: line = line.split(" ") for word in line: word = word.lower() if word not in wordMap: wordMap[word] = word values = wordMap.values() fo = open("rhymes.txt", "w") ct = 0 for word in values: if "," in word: splitArr = word.split(",") for splitWord in splitArr: print "%i. finding rhyme for: %s" % (ct,splitWord) fo.write("%s:" % splitWord.encode('utf8')) splitWord = turnNumIntoString(splitWord) rhymes = util.findRhymes(splitWord) for i in range(len(rhymes)): text = "%s," % rhymes[i] if i != len(rhymes) - 1 else "%s" % rhymes[i] fo.write(text.encode('utf8')) fo.write("\n") ct += 1 else: print "%i. finding rhyme for: %s" % (ct,word) fo.write("%s:" % word.encode('utf8')) word = turnNumIntoString(word) rhymes = util.findRhymes(word) for i in range(len(rhymes)): text = "%s," % rhymes[i] if i != len(rhymes) - 1 else "%s" % rhymes[i] fo.write(text.encode('utf8')) fo.write("\n") ct += 1 fo.close()
def createGrammarTree(genre): def tree(): return defaultdict(tree) def add(t, path): for node in path: t = t[node] #genres = ["country", "hiphoprap", "pop"] genreDict = util.readExamples(genre) sentences = genreDict[genre]["lines"] grammarTree = tree() for i,sentence in enumerate(sentences): print i s = tagSentence(sentence) partsOfSpeech = [tup[1] for tup in s] add(grammarTree,partsOfSpeech) return grammarTree
def __init__(self, genre, startLyrics=None): self.genre = genre self.genre_db = util.readExamples(genre) self.blank_marker = '_' self.startLyrics = startLyrics self.ceiling = 1000000 self.bigramCeiling = 100000 self.trigramCeiling = 10000 self.fourgramCeiling = 1000 self.tagger = tagger.Tagger() self.cache = {} self.rhymeCache = {} self.syllableCache = {} self.endingCache = {} self.numBigrams = 0 self.numTrigrams = 0 self.numFourgrams = 0 self.sentenceEndingFloor = 50
def createPOSMaps(): wordsToPOS = {} PosToWords = {} genres = ["country","hiphoprap","pop"] for g,genre in enumerate(genres): genreDict = util.readExamples(genre) sentences = genreDict[genre]["lines"] for i,sentence in enumerate(sentences): print "genre:{}, line:{}".format(genres[g],i) s = tagSentence(sentence) for tup in s: if tup[0] not in wordsToPOS: wordsToPOS[tup[0]]=[tup[1]] elif tup[1] not in wordsToPOS[tup[0]]: wordsToPOS[tup[0]].append(tup[1]) if tup[1] not in PosToWords: PosToWords[tup[1]]=[tup[0]] elif tup[0] not in PosToWords[tup[1]]: PosToWords[tup[1]].append(tup[0]) with open("wordsToPOS.json",'w+') as outfile1: outfile1.write(json.dumps(wordsToPOS,outfile1)) with open("POSToWords.json",'w+') as outfile2: outfile2.write(json.dumps(PosToWords,outfile2)) return
def createPartsOfSpeechFile(fileName, genre): fo = open(fileName, "w") genreDict = util.readExamples(genre) lines = genreDict[genre]["lines"] print len(lines) ct = 0 structureMap = {} for line in lines: if ct == 5: break st = getStructure(line) print "%s | %s" % (st, line) splitArr = st.split(",") lineArr = line.split(",") for i in range(len(splitArr)): s = splitArr[i] l = lineArr[i] if structureMap.get(s) is None: arr = [l] structureMap[s] = (s, arr, 1) else: tup = structureMap[s] ct = tup[2] + 1 arr = tup[1] arr += [l] structureMap[s] = (tup[0],arr, ct) ct += 1 for k,tup in structureMap.iteritems(): fo.write("%s|%s|%s" % (tup[0],tup[1], tup[2])) fo.write("\n") fo.close()
num_punchlines += 1 precision = 1.0 - float(num_punchlines) / len(examples) recall = 0.0 fscore = 0.0 print "\tPrecision:%f\n\tRecall:%f\n\tF1:%f" % (precision, recall, fscore) def realtimePredict(vocabulary, freq_col_idx, regr): ''' Predicts based on inputed transcript ''' x = raw_input('Give me a punchline: ') print x while (x): examples = [] examples.append((x, 0)) feature, _, _ = fitModel(examples, vocab=vocabulary, frequent_ngram_col_idx=freq_col_idx) predict = regr.predict(feature) print 'Your punchline was funny: ', predict[0] x = raw_input('Give me a punchline: ') trainExamples = util.readExamples('switchboardsampleL.train') valExamples = util.readExamples('switchboardsampleL.val') testExamples = util.readExamples('switchboardsampleL.test') vocabulary, freq_col_idx, regr = learnPredictor(trainExamples, valExamples, testExamples) allPosNegBaseline(trainExamples, valExamples, testExamples) realtimePredict(vocabulary, freq_col_idx, regr)
trainPredict = regr.predict(trainX) print("coefficient of acoustic", regr.coef_) print("TRAIN Mean squared error", mean_squared_error(trainY, trainPredict)) print("TRAIN Variance score", r2_score(trainY, trainPredict)) devX, _, _ = extractFeatures(devExamples, vocab=vocabulary, frequent_ngram_col_idx=frequent_ngram_col_idx) devY = [y for x, y in devExamples] devX = np.array(devX) devY = np.reshape(np.array(devY), (len(devY), 1)) print('DEV X shape', devX.shape) print('DEV Y shape', devY.shape) pickle.dump(devX, open('devX_b.pkl', 'wb')) pickle.dump(devY, open('devY_b.pkl', 'wb')) devPredict = regr.predict(devX) print("DEV Mean squared error", mean_squared_error(devY, devPredict)) print("DEV Variance score", r2_score(devY, devPredict)) return vocabulary, frequent_ngram_col_idx, regr trainExamples = util.readExamples('moviesS_b.train') devExamples = util.readExamples('moviesS_b.dev') testExamples = util.readExamples('moviesS_b.test') trainPredictor(trainExamples, devExamples)
# main REPL loop response = raw_input("Press 's' to start: ") while response != 'q': print("press enter to stop recording") record_audio() print("audio recorded") transcript = get_transcript_from_file(credential) print("transcript: ", transcript) convert_audio_sample() test_dataset = load_dataset("laughbot_audio.test.pkl") feature_b, label_b, seqlens_b = make_batches( test_dataset, batch_size=len(test_dataset[0])) feature_b = pad_all_batches(feature_b) batch_cost, summary, acc, predicted, acoustic = model.train_on_batch( session, feature_b[0], label_b[0], seqlens_b[0], train=False) text = readExamples('laughbot_text.txt') prediction = predictLaughter(text, acoustic) if prediction[0] == 1: play_laughtrack() else: print('Not funny :(') response = raw_input("Press 'c' to continue, 'q' to quit: ") print('Thanks for talking to me')
import submission, util from collections import defaultdict # Read in examples trainExamples = util.readExamples('names.train') devExamples = util.readExamples('names.dev') def featureExtractor(x): # x = "took Mauritius into" phi = defaultdict(float) #phi[x] = 1 tokens = x.split() left, entity, right = tokens[0], tokens[1:-1], tokens[-1] phi['entity is ' + ' '.join(entity)] = 1 phi['left is ' + left] = 1 phi['right is ' + right] = 1 for word in entity: phi['entity contains ' + word] = 1 phi['entity contains prefix ' + word[:4]] = 1 phi['entity contains suffix ' + word[-4:]] = 1 return phi # Learn a predictor weights = submission.learnPredictor(trainExamples, devExamples, featureExtractor, 30, 0.05) util.outputWeights(weights, 'weights') util.outputErrorAnalysis(devExamples, featureExtractor, weights, 'error-analysis')
for x, y in trainExamples: phi = featureExtractor(x) # print phi # if y * score < 1 (wrong prediction) then calculate gradient loss then update weight for each feature margin = y*util.dotProduct(weights, phi) if (1-margin) > 0: indicator = 1 else: indicator = 0 scale = stepSize*indicator*y increment(weights, scale, phi) # this uses the defined feature extractor to predict the classification of x def predictor(x): phi = featureExtractor(x) # create thresholds for different scores score = dotProduct(phi, weights) # return 1 if (dotProduct(phi, weights) > 0) else -1 # Print out training and test error for every iteration: # print 'TRAINING ERROR:', util.evaluatePredictor(trainExamples, predictor) # print 'TEST ERROR:', util.evaluatePredictor(testExamples, predictor) return weights # Run trainReviews = util.readExamples('reviews.train') testReviews = util.readExamples('reviews.dev') featureExtractor = extractWordFeatures weights = learnPredictor(trainReviews, testReviews, featureExtractor) print 'output weights:', weights