def GenerateFFA(self, pattern): mf = cl.mem_flags global_size = (len(pattern), ) #8388608 local_size = (2, ) counter = np.int32(-1) self.matches = np.zeros(len(pattern)).astype(np.str) self.stack = np.zeros(len(pattern)).astype(np.int32) d_pat = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=pattern.encode()) counterBuffer = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=counter) stackBuffer = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, self.stack.nbytes) self.resultsBuffer = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, self.matches.nbytes) regex = self.program.PostRegex regex.set_scalar_arg_dtypes([None, None, None, None, int]) event = regex(self.queue, global_size, local_size, d_pat, counterBuffer, stackBuffer, self.resultsBuffer, len(pattern)) event.wait()
sql_get_begEnd = [ "SELECT created_at FROM tweets ORDER BY created_at ASC LIMIT 0,1", "SELECT created_at FROM tweets ORDER BY created_at DESC LIMIT 0,1" ] # awards queries -- retrieve database records in chronological order sql_awards = [ "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @eonline:%' ORDER BY created_at ASC", "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @goldenglobes:%' ORDER BY created_at ASC" ] sql_all_tweets = "SELECT created_at, tweet FROM tweets ORDER BY created_at ASC" # regexes for fashion commentary rX1 = regex( r'([a-zA-Z]* [a-zA-Z\']*)? looks (so |really |absolutely )?(fantastic|amazing|wonderful|fabulous|great|good|stunning|ravishing|beautiful|sensational|hot|sexy|gorgeous|effortless|awesome|ethereal|effervescent|radiant|fierce|lovely|elegant|flawless|divine|pretty|cute|incredible)+(.*)' ) rX2 = regex( r'([a-zA-Z]* [a-zA-Z\']*)? (looks|is) (so |really |absolutely )?(horrible|ugly|gross|terrible|horrendous|attrocious|fat|cheap|slutty|busted|unflattering|ill-fitting|old|heavy|missed the mark|like a distaster|awful|messy|hot mess|a mess[. !,]|dreadful|horrid|appalling){1}(.*)' ) regexes = [rX1, rX2] # Exmaple tweet formats: # RT @eonline: Best Original Song, Motion Picture: Skyfall by @OfficialAdele! #GoldenGlobes (url) (url) # RT @goldenglobes: Best Supporting Actor in a Motion Picture - Christoph Waltz - Django Unchained - #GoldenGlobes con = lite.connect("gg_tweets.sqlite3") timeInterval = ["", ""] fashion = [] awards = []
summaryFile= open("/home/narain/workspace/questiongeneration/summary.txt","wb") transcriptFile= open("/home/narain/workspace/questiongeneration/transcript.txt","wb") with open("/home/narain/workspace/questiongeneration/transcript.json") as json_file: json_data = json.load(json_file) for item in json_data: for attribute, value in item.iteritems(): paragraph.append(value) timestamp.append(attribute) content = content + " "+ value #print "Before processing: \n",content content = contractions(content) #print "After expanding contractions:\n",content content = regex(content) #print "After processing: \n",content transcriptFile.write(content) transcriptFile.close() st = SummaryTool() sentences_dic = st.get_senteces_ranks(content) summary = st.get_distractors(content,sentences_dic,1) #print "Summary:\n",summary for p in paragraph: temp = p count = count + 1 if temp.find(summary) != -1: # print "Found the sentence in para -",count # print "\n" start_time = timestamp[count-1] end_time = timestamp[count]
from award import * from noms import * from commentary import * from regex import * from baby_names import * # queries to get beginning and end time of the event sql_get_begEnd = ["SELECT created_at FROM tweets ORDER BY created_at ASC LIMIT 0,1", "SELECT created_at FROM tweets ORDER BY created_at DESC LIMIT 0,1"] # awards queries -- retrieve database records in chronological order sql_awards = ["SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @eonline:%' ORDER BY created_at ASC", "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @goldenglobes:%' ORDER BY created_at ASC"] sql_all_tweets = "SELECT created_at, tweet FROM tweets ORDER BY created_at ASC" # regexes for fashion commentary rX1 = regex(r'([a-zA-Z]* [a-zA-Z\']*)? looks (so |really |absolutely )?(fantastic|amazing|wonderful|fabulous|great|good|stunning|ravishing|beautiful|sensational|hot|sexy|gorgeous|effortless|awesome|ethereal|effervescent|radiant|fierce|lovely|elegant|flawless|divine|pretty|cute|incredible)+(.*)') rX2 = regex(r'([a-zA-Z]* [a-zA-Z\']*)? (looks|is) (so |really |absolutely )?(horrible|ugly|gross|terrible|horrendous|attrocious|fat|cheap|slutty|busted|unflattering|ill-fitting|old|heavy|missed the mark|like a distaster|awful|messy|hot mess|a mess[. !,]|dreadful|horrid|appalling){1}(.*)') regexes = [rX1, rX2] # Exmaple tweet formats: # RT @eonline: Best Original Song, Motion Picture: Skyfall by @OfficialAdele! #GoldenGlobes (url) (url) # RT @goldenglobes: Best Supporting Actor in a Motion Picture - Christoph Waltz - Django Unchained - #GoldenGlobes con = lite.connect("gg_tweets.sqlite3") timeInterval = ["",""] fashion = [] awards = [] with con:
def __init__(self, *args): self._r=r=regex(*compile, **args) self._init(r)
word = stopword.remove(word).lower() pil = int(sys.argv[2]) r = int(sys.argv[3]) if (pil == 1): id = Boyer_Moore(Q, word) elif (pil == 2): id = KMP(Q, word) idx = 0 if (id != -1 and word == Z3[getindex(id)][0]): print(Z3[getindex(id)][1]) else: id, percent = regex(Z3, word) idr = sorted(range(len(percent)), key=lambda i: percent[i])[-3:] ids = [] for m in range(3): if (percent[idr[m]] != 0): ids.insert(0, idr[m]) if (len(ids) >= 1): if (percent[ids[0]] >= r * 0.01): print(Z3[ids[0]][1]) else: ok = False if (len(ids) == 1):