Ejemplo n.º 1
0
class Reddit:
	
	def __init__(self):
		self.agent = praw.Reddit(user_agent="cs_haiku_bot")
		self.haiku = Haiku()
		self.sc = SyllableCounter()

	def allComments(self):
		for comment in [x.body for x in self.agent.get_all_comments(limit=300)]:
			if self.isHaikuCandidate(comment):
				self.prepareHaiku(comment)

	def subredditComments(self,subreddit):
		sub = self.agent.get_subreddit(subreddit)

		for comment in [x.body for x in sub.get_comments(limit=300)]:
			if self.isHaikuCandidate(comment):
				self.prepareHaiku(comment)
	
	def isHaikuCandidate(self,comment):
		return self.sc.getLineCounts(comment) and sum(self.sc.getLineCounts(comment)) == 17
			
	def prepareHaiku(self,comment):
		lineCounts = [5,7,5]
		
		splits = [line.split(" ") for line in comment.splitlines()]	
		words = [str(word) for word in list(itertools.chain(*splits))]
			
		count = 0
		lineIndex = 0
		haiku = ""
		
		for index in xrange(len(words)):
			if count > lineCounts[lineIndex] or count < 0:
				return

			if lineIndex > 2:
				break
		
			if count == lineCounts[lineIndex]:
				haiku+="\n"
				haiku+=words[index] + " "
				count = self.sc.syllableCount(words[index])
				lineIndex+=1
			else:
				haiku+=words[index] + " "
				count += self.sc.syllableCount(words[index])
	
		print "Haiku: \n"
		print haiku
Ejemplo n.º 2
0
class DatabaseInit:

	def __init__(self):
		self.connection = sqlite3.connect('data/haiku.db')
		self.cursor = self.connection.cursor()
		self.syllableCounter = SyllableCounter()

		path = "data/ngrams"

		self.fileList = [path+"/2grams.txt",
				path+"/3grams.txt",
				path+"/4grams.txt",
				path+"/5grams.txt"]

		self.fiveCount = 0
		self.sevenCount = 0
		self.segmentList = {}
		self.posList = {}
		self.bigramList = {}
		self.conversion = {}
		
		for row in csv.reader(open("data/tagset/conversion.csv","rb")):
			self.conversion[row[0]] = row[1]

	def TablesInit(self):
		self.cursor.execute('''CREATE TABLE FiveSyllables
					(id integer primary key, segment text)''')
		
		self.cursor.execute('''CREATE TABLE SevenSyllables
					(id integer primary key, segment text)''')
		
		self.cursor.execute('''CREATE TABLE Bigrams
					(id integer primary key, firstWord text, firstPos text, secondWord text,
					secondPos text,frequency float, firstSyllables integer,secondSyllables integer)''')


		self.cursor.execute('''CREATE TABLE PoeticBigrams
					(firstWord text, firstPos text, secondWord text,
					secondPos text, firstSyllables integer,secondSyllables integer)''')

		self.cursor.execute('''CREATE TABLE WordAssociations
					(cue text,target text,FSG float,BSG float, cuePos text, tarPos text)''')
		self.PoeticBigrams()
		self.WordAssociationInit()

		for i in xrange(len(self.fileList)):
			f = open(self.fileList[i])
			for line in f:
				self.SyllableTablesInit(line,i)

				if i == 3:  #only use 5grams data since using everything takes too long
					self.BuildBigramTable(line)

			self.connection.commit()
			f.close()
		
		self.BigramTablesInit()
		
	def SyllableTablesInit(self,line,fileNumber):
		#populates data in FiveSyllables Table and SevenSyllables Table
		    
		culledLine = " ".join([word.strip("\r\n") for word in line.split("\t")[1:(fileNumber+3)]])
				
		if culledLine.lower() not in self.segmentList:
			self.segmentList[culledLine.lower()] = 0
		else:
			return
					
		syllableCounts = self.syllableCounter.getLineCounts(culledLine)
				
		if syllableCounts and syllableCounts[0] == 5:
			self.fiveCount+=1
			self.cursor.execute("INSERT into FiveSyllables VALUES (?,?)",(self.fiveCount,culledLine))

		if syllableCounts and syllableCounts[0] == 7:
			self.sevenCount+=1
			self.cursor.execute("INSERT into SevenSyllables VALUES (?,?)",(self.sevenCount,culledLine))
			
		
	
	def BuildBigramTable(self,line):
		splitLine = [word.strip("\t\r\n ") for word in line.split("\t")]
		count = int(splitLine[0])

		for i in xrange(1,5):
			firstWord = splitLine[i]
			secondWord = splitLine[i+1]
			firstPos = self.convertPos(splitLine[i+5])
			secondPos = self.convertPos(splitLine[i+6])
			
			if firstWord not in self.bigramList:
				self.bigramList[firstWord] = {}
				self.bigramList[firstWord][secondWord] = []
				self.bigramList[firstWord][secondWord].append(firstPos)
				self.bigramList[firstWord][secondWord].append(secondPos)
				self.bigramList[firstWord][secondWord].append(count)
			else:
				if secondWord not in self.bigramList[firstWord]:
					self.bigramList[firstWord][secondWord] = []
					self.bigramList[firstWord][secondWord].append(firstPos)
					self.bigramList[firstWord][secondWord].append(secondPos)
					self.bigramList[firstWord][secondWord].append(count)
				else:
					self.bigramList[firstWord][secondWord][2]+=count
		
			if "totCount" not in self.bigramList[firstWord]:
				self.bigramList[firstWord]["totCount"] = count
			else:
				self.bigramList[firstWord]["totCount"]+=count
						
	def BigramTablesInit(self):
		count = 0
		for key,value in self.bigramList.items():
			for word,information in self.bigramList[key].items():
				if word != "totCount":
					count+=1
					keySyllables = self.syllableCounter.syllableCount(key) 
					wordSyllables = self.syllableCounter.syllableCount(word)

					information[2] = float("{0:.2f}".format(information[2]/(self.bigramList[key]["totCount"]+0.0)))
						
					self.cursor.execute("INSERT into Bigrams VALUES (?,?,?,?,?,?,?,?)",
								(count,key,information[0],word,information[1],information[2],
								 keySyllables,wordSyllables))

		self.connection.commit()

	def PoeticBigrams(self):

		f = open("data/haiku_samples/tagged_haiku")
		count = 0

		for line in f:
			splits = line.split(" ")
	
			if count % 3 == 0 or line == "\n":
				count += 1
				continue

			if count == 0 or count % 4 == 0:
				self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)",
							("S==>","S==>",splits[0].split("/")[0],splits[0].split("/")[1],
							 0,self.syllableCounter.syllableCount(splits[0].split("/")[0]),))
			else:
				for index in xrange(len(splits)-2):
					firstWord = splits[index].split("/")[0]
					secondWord = splits[index+1].split("/")[0]
					firstPos = splits[index].split("/")[1]
					secondPos = splits[index+1].split("/")[1]
					
					firstSyllables = self.syllableCounter.syllableCount(firstWord)
					secondSyllables = self.syllableCounter.syllableCount(secondWord)
					
					self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)",
								(firstWord,firstPos,secondWord,secondPos,firstSyllables,secondSyllables,))
					
			count+=1
	
	def WordAssociationInit(self):

		fileList = ["AB.csv",
			"C.csv",
			"LO.csv",
			"PR.csv",
			"TZ.csv",
			"DF.csv",
			"GK.csv",
			"S.csv"]

		path = "data/associations/"

		for files in fileList:
			s = csv.reader(open(path + files))
			
			for row in s:
				try:
					FSG = float(row[5])
				except:
					FSG = 0.0
				
				try:
					BSG = float(row[6])
				except:	
					BSG = 0.0
				
				cue = row[0].lower().strip(" ")
				target = row[1].lower().strip(" ")
				cuePos = row[17].strip(" ")				
				tarPos = row[26].strip(" ")

				self.cursor.execute("INSERT into WordAssociations VALUES (?,?,?,?,?,?)",
							(cue,target,FSG,BSG,cuePos,tarPos))	

	def convertPos(self,pos):
		pos = pos.upper()

		if pos in self.conversion:
			return self.conversion[pos]
		else:
			return pos
Ejemplo n.º 3
0
class Individual:
	
	def __init__(self,grammar,season):
		self.syllableCounter = SyllableCounter()
		self.grammar = grammar
		self.season = season
	
		connection = sqlite3.connect("data/haiku.db")
		self.cursor = connection.cursor()

	def naiveMutate(self,haiku):
	
		lines = haiku.split("\n")	
	
		randomSelection = randint(1,11)
	
		if randomSelection < 6:
			self.cursor.execute("Select count(*) from FiveSyllables")
			fiveUpperBound = self.cursor.fetchone()[0]
			randomIndex = int(randint(0,fiveUpperBound))
			self.cursor.execute("Select segment from FiveSyllables where id = ?",(randomIndex,))
		
			randomLine = choice([0,2])
			lines[randomLine] = self.cursor.fetchone()[0]
		else:
			self.cursor.execute("Select count(*) from SevenSyllables")
			sevenUpperBound = self.cursor.fetchone()[0]
			randomIndex = int(randint(0,sevenUpperBound))
			self.cursor.execute("Select segment from SevenSyllables where id = ?",(randomIndex,))
			lines[1] = self.cursor.fetchone()[0]	
	
		return "\n".join(lines)

	def mutate(self,haiku):
		splitHaiku = [line.split(" ") for line in [line for line in haiku.splitlines()]]				
		
		seasonList = {"summer": Summer(),"winter": Winter(),"fall": Fall(),"spring": Spring()}
		
		associations = seasonList[self.season]
			
		splitHaiku = self.replaceNouns(splitHaiku,associations)
		
		return "\n".join([" ".join(line) for line in [line for line in splitHaiku]])

	def replaceNouns(self,splitHaiku,seed):

		for i in xrange(len(splitHaiku)):
			for j in xrange(len(splitHaiku[i])):
				if "NN" in self.grammar[i][j] and splitHaiku[i][j] != seed:
					
					if len(seed) == 0:
						break
					
					replacement = seed[randint(0,len(seed)-1)]
					seed.remove(replacement)
					
					if j != 0:
						pre = [row for row in self.cursor.execute('''select firstWord from Bigrams 
						where firstPos = ? and secondWord =?''',
						(self.grammar[i][j-1],replacement))]
						
						if len(pre) > 0:
							index = randint(0,len(pre)-1)
							splitHaiku[i][j-1] = str(pre[index][0])

					if j < len(splitHaiku[i])-1:
						pos = [row for row in self.cursor.execute('''select secondWord from Bigrams
						where secondPos = ? and firstWord = ?''',
						(self.grammar[i][j+1],replacement))]
					
						if len(pos) > 0:
							index = randint(0,len(pos)-1)
							splitHaiku[i][j+1] = str(pos[index][0])

					splitHaiku[i][j] = replacement

		return splitHaiku		
	
		
	def fitness(self,haiku):
		splits = [line.split(" ") for line in haiku.split("\n")]	
		words = [str(word) for word in list(itertools.chain(*splits))]
 	
		similarity = 0	

		for word in words:
			if wn.morphy(word,wn.NOUN):
				nounSyns = wn.synsets(word,wn.NOUN)

				for secondWord in words:
					if wn.morphy(secondWord,wn.NOUN) and (word not in secondWord and secondWord not in word):
						for synset in nounSyns:
							if secondWord in synset.lemma_names:
								#similarity+=10
								pass

					similarity+=connectedness(word,secondWord)					

						
		similarity+=self.isHaiku(haiku)
			
		return similarity

		
	def isHaiku(self,haiku):	
		
		haikuCounts = [5,7,5]
		lineCounts = []
		splits = [line.split(" ") for line in haiku.split("\n")]	
 		
		for line in splits:
			count = 0
			for word in line:	
				count += self.syllableCounter.syllableCount(word)
			
			lineCounts.append(count)
	
		if haikuCounts == lineCounts:
			return 200
		else:
			return 0