class Reddit: def __init__(self): self.agent = praw.Reddit(user_agent="cs_haiku_bot") self.haiku = Haiku() self.sc = SyllableCounter() def allComments(self): for comment in [x.body for x in self.agent.get_all_comments(limit=300)]: if self.isHaikuCandidate(comment): self.prepareHaiku(comment) def subredditComments(self,subreddit): sub = self.agent.get_subreddit(subreddit) for comment in [x.body for x in sub.get_comments(limit=300)]: if self.isHaikuCandidate(comment): self.prepareHaiku(comment) def isHaikuCandidate(self,comment): return self.sc.getLineCounts(comment) and sum(self.sc.getLineCounts(comment)) == 17 def prepareHaiku(self,comment): lineCounts = [5,7,5] splits = [line.split(" ") for line in comment.splitlines()] words = [str(word) for word in list(itertools.chain(*splits))] count = 0 lineIndex = 0 haiku = "" for index in xrange(len(words)): if count > lineCounts[lineIndex] or count < 0: return if lineIndex > 2: break if count == lineCounts[lineIndex]: haiku+="\n" haiku+=words[index] + " " count = self.sc.syllableCount(words[index]) lineIndex+=1 else: haiku+=words[index] + " " count += self.sc.syllableCount(words[index]) print "Haiku: \n" print haiku
class DatabaseInit: def __init__(self): self.connection = sqlite3.connect('data/haiku.db') self.cursor = self.connection.cursor() self.syllableCounter = SyllableCounter() path = "data/ngrams" self.fileList = [path+"/2grams.txt", path+"/3grams.txt", path+"/4grams.txt", path+"/5grams.txt"] self.fiveCount = 0 self.sevenCount = 0 self.segmentList = {} self.posList = {} self.bigramList = {} self.conversion = {} for row in csv.reader(open("data/tagset/conversion.csv","rb")): self.conversion[row[0]] = row[1] def TablesInit(self): self.cursor.execute('''CREATE TABLE FiveSyllables (id integer primary key, segment text)''') self.cursor.execute('''CREATE TABLE SevenSyllables (id integer primary key, segment text)''') self.cursor.execute('''CREATE TABLE Bigrams (id integer primary key, firstWord text, firstPos text, secondWord text, secondPos text,frequency float, firstSyllables integer,secondSyllables integer)''') self.cursor.execute('''CREATE TABLE PoeticBigrams (firstWord text, firstPos text, secondWord text, secondPos text, firstSyllables integer,secondSyllables integer)''') self.cursor.execute('''CREATE TABLE WordAssociations (cue text,target text,FSG float,BSG float, cuePos text, tarPos text)''') self.PoeticBigrams() self.WordAssociationInit() for i in xrange(len(self.fileList)): f = open(self.fileList[i]) for line in f: self.SyllableTablesInit(line,i) if i == 3: #only use 5grams data since using everything takes too long self.BuildBigramTable(line) self.connection.commit() f.close() self.BigramTablesInit() def SyllableTablesInit(self,line,fileNumber): #populates data in FiveSyllables Table and SevenSyllables Table culledLine = " ".join([word.strip("\r\n") for word in line.split("\t")[1:(fileNumber+3)]]) if culledLine.lower() not in self.segmentList: self.segmentList[culledLine.lower()] = 0 else: return syllableCounts = self.syllableCounter.getLineCounts(culledLine) if syllableCounts and syllableCounts[0] == 5: self.fiveCount+=1 self.cursor.execute("INSERT into FiveSyllables VALUES (?,?)",(self.fiveCount,culledLine)) if syllableCounts and syllableCounts[0] == 7: self.sevenCount+=1 self.cursor.execute("INSERT into SevenSyllables VALUES (?,?)",(self.sevenCount,culledLine)) def BuildBigramTable(self,line): splitLine = [word.strip("\t\r\n ") for word in line.split("\t")] count = int(splitLine[0]) for i in xrange(1,5): firstWord = splitLine[i] secondWord = splitLine[i+1] firstPos = self.convertPos(splitLine[i+5]) secondPos = self.convertPos(splitLine[i+6]) if firstWord not in self.bigramList: self.bigramList[firstWord] = {} self.bigramList[firstWord][secondWord] = [] self.bigramList[firstWord][secondWord].append(firstPos) self.bigramList[firstWord][secondWord].append(secondPos) self.bigramList[firstWord][secondWord].append(count) else: if secondWord not in self.bigramList[firstWord]: self.bigramList[firstWord][secondWord] = [] self.bigramList[firstWord][secondWord].append(firstPos) self.bigramList[firstWord][secondWord].append(secondPos) self.bigramList[firstWord][secondWord].append(count) else: self.bigramList[firstWord][secondWord][2]+=count if "totCount" not in self.bigramList[firstWord]: self.bigramList[firstWord]["totCount"] = count else: self.bigramList[firstWord]["totCount"]+=count def BigramTablesInit(self): count = 0 for key,value in self.bigramList.items(): for word,information in self.bigramList[key].items(): if word != "totCount": count+=1 keySyllables = self.syllableCounter.syllableCount(key) wordSyllables = self.syllableCounter.syllableCount(word) information[2] = float("{0:.2f}".format(information[2]/(self.bigramList[key]["totCount"]+0.0))) self.cursor.execute("INSERT into Bigrams VALUES (?,?,?,?,?,?,?,?)", (count,key,information[0],word,information[1],information[2], keySyllables,wordSyllables)) self.connection.commit() def PoeticBigrams(self): f = open("data/haiku_samples/tagged_haiku") count = 0 for line in f: splits = line.split(" ") if count % 3 == 0 or line == "\n": count += 1 continue if count == 0 or count % 4 == 0: self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)", ("S==>","S==>",splits[0].split("/")[0],splits[0].split("/")[1], 0,self.syllableCounter.syllableCount(splits[0].split("/")[0]),)) else: for index in xrange(len(splits)-2): firstWord = splits[index].split("/")[0] secondWord = splits[index+1].split("/")[0] firstPos = splits[index].split("/")[1] secondPos = splits[index+1].split("/")[1] firstSyllables = self.syllableCounter.syllableCount(firstWord) secondSyllables = self.syllableCounter.syllableCount(secondWord) self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)", (firstWord,firstPos,secondWord,secondPos,firstSyllables,secondSyllables,)) count+=1 def WordAssociationInit(self): fileList = ["AB.csv", "C.csv", "LO.csv", "PR.csv", "TZ.csv", "DF.csv", "GK.csv", "S.csv"] path = "data/associations/" for files in fileList: s = csv.reader(open(path + files)) for row in s: try: FSG = float(row[5]) except: FSG = 0.0 try: BSG = float(row[6]) except: BSG = 0.0 cue = row[0].lower().strip(" ") target = row[1].lower().strip(" ") cuePos = row[17].strip(" ") tarPos = row[26].strip(" ") self.cursor.execute("INSERT into WordAssociations VALUES (?,?,?,?,?,?)", (cue,target,FSG,BSG,cuePos,tarPos)) def convertPos(self,pos): pos = pos.upper() if pos in self.conversion: return self.conversion[pos] else: return pos
class Individual: def __init__(self,grammar,season): self.syllableCounter = SyllableCounter() self.grammar = grammar self.season = season connection = sqlite3.connect("data/haiku.db") self.cursor = connection.cursor() def naiveMutate(self,haiku): lines = haiku.split("\n") randomSelection = randint(1,11) if randomSelection < 6: self.cursor.execute("Select count(*) from FiveSyllables") fiveUpperBound = self.cursor.fetchone()[0] randomIndex = int(randint(0,fiveUpperBound)) self.cursor.execute("Select segment from FiveSyllables where id = ?",(randomIndex,)) randomLine = choice([0,2]) lines[randomLine] = self.cursor.fetchone()[0] else: self.cursor.execute("Select count(*) from SevenSyllables") sevenUpperBound = self.cursor.fetchone()[0] randomIndex = int(randint(0,sevenUpperBound)) self.cursor.execute("Select segment from SevenSyllables where id = ?",(randomIndex,)) lines[1] = self.cursor.fetchone()[0] return "\n".join(lines) def mutate(self,haiku): splitHaiku = [line.split(" ") for line in [line for line in haiku.splitlines()]] seasonList = {"summer": Summer(),"winter": Winter(),"fall": Fall(),"spring": Spring()} associations = seasonList[self.season] splitHaiku = self.replaceNouns(splitHaiku,associations) return "\n".join([" ".join(line) for line in [line for line in splitHaiku]]) def replaceNouns(self,splitHaiku,seed): for i in xrange(len(splitHaiku)): for j in xrange(len(splitHaiku[i])): if "NN" in self.grammar[i][j] and splitHaiku[i][j] != seed: if len(seed) == 0: break replacement = seed[randint(0,len(seed)-1)] seed.remove(replacement) if j != 0: pre = [row for row in self.cursor.execute('''select firstWord from Bigrams where firstPos = ? and secondWord =?''', (self.grammar[i][j-1],replacement))] if len(pre) > 0: index = randint(0,len(pre)-1) splitHaiku[i][j-1] = str(pre[index][0]) if j < len(splitHaiku[i])-1: pos = [row for row in self.cursor.execute('''select secondWord from Bigrams where secondPos = ? and firstWord = ?''', (self.grammar[i][j+1],replacement))] if len(pos) > 0: index = randint(0,len(pos)-1) splitHaiku[i][j+1] = str(pos[index][0]) splitHaiku[i][j] = replacement return splitHaiku def fitness(self,haiku): splits = [line.split(" ") for line in haiku.split("\n")] words = [str(word) for word in list(itertools.chain(*splits))] similarity = 0 for word in words: if wn.morphy(word,wn.NOUN): nounSyns = wn.synsets(word,wn.NOUN) for secondWord in words: if wn.morphy(secondWord,wn.NOUN) and (word not in secondWord and secondWord not in word): for synset in nounSyns: if secondWord in synset.lemma_names: #similarity+=10 pass similarity+=connectedness(word,secondWord) similarity+=self.isHaiku(haiku) return similarity def isHaiku(self,haiku): haikuCounts = [5,7,5] lineCounts = [] splits = [line.split(" ") for line in haiku.split("\n")] for line in splits: count = 0 for word in line: count += self.syllableCounter.syllableCount(word) lineCounts.append(count) if haikuCounts == lineCounts: return 200 else: return 0