Example #1
0
    def GenerateFFA(self, pattern):
        mf = cl.mem_flags
        global_size = (len(pattern), )  #8388608
        local_size = (2, )

        counter = np.int32(-1)
        self.matches = np.zeros(len(pattern)).astype(np.str)
        self.stack = np.zeros(len(pattern)).astype(np.int32)

        d_pat = cl.Buffer(self.ctx,
                          cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                          hostbuf=pattern.encode())
        counterBuffer = cl.Buffer(self.ctx,
                                  cl.mem_flags.READ_WRITE
                                  | cl.mem_flags.COPY_HOST_PTR,
                                  hostbuf=counter)
        stackBuffer = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE,
                                self.stack.nbytes)
        self.resultsBuffer = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY,
                                       self.matches.nbytes)

        regex = self.program.PostRegex
        regex.set_scalar_arg_dtypes([None, None, None, None, int])
        event = regex(self.queue, global_size, local_size, d_pat,
                      counterBuffer, stackBuffer, self.resultsBuffer,
                      len(pattern))
        event.wait()
Example #2
0
sql_get_begEnd = [
    "SELECT created_at FROM tweets ORDER BY created_at ASC LIMIT 0,1",
    "SELECT created_at FROM tweets ORDER BY created_at DESC LIMIT 0,1"
]

# awards queries -- retrieve database records in chronological order
sql_awards = [
    "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @eonline:%' ORDER BY created_at ASC",
    "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @goldenglobes:%' ORDER BY created_at ASC"
]

sql_all_tweets = "SELECT created_at, tweet FROM tweets ORDER BY created_at ASC"

# regexes for fashion commentary
rX1 = regex(
    r'([a-zA-Z]* [a-zA-Z\']*)? looks (so |really |absolutely )?(fantastic|amazing|wonderful|fabulous|great|good|stunning|ravishing|beautiful|sensational|hot|sexy|gorgeous|effortless|awesome|ethereal|effervescent|radiant|fierce|lovely|elegant|flawless|divine|pretty|cute|incredible)+(.*)'
)
rX2 = regex(
    r'([a-zA-Z]* [a-zA-Z\']*)? (looks|is) (so |really |absolutely )?(horrible|ugly|gross|terrible|horrendous|attrocious|fat|cheap|slutty|busted|unflattering|ill-fitting|old|heavy|missed the mark|like a distaster|awful|messy|hot mess|a mess[. !,]|dreadful|horrid|appalling){1}(.*)'
)
regexes = [rX1, rX2]

# Exmaple tweet formats:
# RT @eonline: Best Original Song, Motion Picture: Skyfall by @OfficialAdele! #GoldenGlobes (url) (url)
# RT @goldenglobes: Best Supporting Actor in a Motion Picture - Christoph Waltz - Django Unchained - #GoldenGlobes

con = lite.connect("gg_tweets.sqlite3")

timeInterval = ["", ""]
fashion = []
awards = []
summaryFile= open("/home/narain/workspace/questiongeneration/summary.txt","wb")
transcriptFile= open("/home/narain/workspace/questiongeneration/transcript.txt","wb")
with open("/home/narain/workspace/questiongeneration/transcript.json") as json_file:
    json_data = json.load(json_file)
   
for item in json_data:
	for attribute, value in item.iteritems():
		paragraph.append(value)
		timestamp.append(attribute)
		content = content + " "+ value


#print "Before processing: \n",content
content = contractions(content)
#print "After expanding contractions:\n",content
content = regex(content)
#print "After processing: \n",content
transcriptFile.write(content)
transcriptFile.close()
st = SummaryTool()
sentences_dic = st.get_senteces_ranks(content)
summary = st.get_distractors(content,sentences_dic,1)
#print "Summary:\n",summary
for p in paragraph:
    temp = p
    count = count + 1
    if temp.find(summary) != -1:
 #       print "Found the sentence in para -",count
  #      print "\n"
        start_time = timestamp[count-1]
        end_time = timestamp[count]
Example #4
0
from award import *
from noms import *
from commentary import *
from regex import *
from baby_names import *

# queries to get beginning and end time of the event
sql_get_begEnd = ["SELECT created_at FROM tweets ORDER BY created_at ASC LIMIT 0,1", "SELECT created_at FROM tweets ORDER BY created_at DESC LIMIT 0,1"]

# awards queries -- retrieve database records in chronological order
sql_awards = ["SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @eonline:%' ORDER BY created_at ASC", "SELECT created_at, tweet FROM tweets WHERE tweet LIKE 'RT @goldenglobes:%' ORDER BY created_at ASC"]

sql_all_tweets = "SELECT created_at, tweet FROM tweets ORDER BY created_at ASC"

# regexes for fashion commentary
rX1 = regex(r'([a-zA-Z]* [a-zA-Z\']*)? looks (so |really |absolutely )?(fantastic|amazing|wonderful|fabulous|great|good|stunning|ravishing|beautiful|sensational|hot|sexy|gorgeous|effortless|awesome|ethereal|effervescent|radiant|fierce|lovely|elegant|flawless|divine|pretty|cute|incredible)+(.*)')
rX2 = regex(r'([a-zA-Z]* [a-zA-Z\']*)? (looks|is) (so |really |absolutely )?(horrible|ugly|gross|terrible|horrendous|attrocious|fat|cheap|slutty|busted|unflattering|ill-fitting|old|heavy|missed the mark|like a distaster|awful|messy|hot mess|a mess[. !,]|dreadful|horrid|appalling){1}(.*)')
regexes = [rX1, rX2]


# Exmaple tweet formats:
# RT @eonline: Best Original Song, Motion Picture: Skyfall by @OfficialAdele! #GoldenGlobes (url) (url)
# RT @goldenglobes: Best Supporting Actor in a Motion Picture - Christoph Waltz - Django Unchained - #GoldenGlobes   

con = lite.connect("gg_tweets.sqlite3")

timeInterval = ["",""]
fashion = []
awards = []

with con:
Example #5
0
 def __init__(self, *args):
     self._r=r=regex(*compile, **args)
     self._init(r)
Example #6
0
        word = stopword.remove(word).lower()

    pil = int(sys.argv[2])
    r = int(sys.argv[3])

    if (pil == 1):
        id = Boyer_Moore(Q, word)
    elif (pil == 2):
        id = KMP(Q, word)

    idx = 0

    if (id != -1 and word == Z3[getindex(id)][0]):
        print(Z3[getindex(id)][1])
    else:
        id, percent = regex(Z3, word)

        idr = sorted(range(len(percent)), key=lambda i: percent[i])[-3:]
        ids = []

        for m in range(3):
            if (percent[idr[m]] != 0):
                ids.insert(0, idr[m])

        if (len(ids) >= 1):
            if (percent[ids[0]] >= r * 0.01):
                print(Z3[ids[0]][1])
            else:
                ok = False

                if (len(ids) == 1):