def setExpiryFromKeyword(keyword): re_k = re.compile('\s?((to|2)day|(to|2)night)',re.IGNORECASE) if re_k.match(keyword): current_time = datetime.now() datestring = str(current_time.month) + '-' + str(current_time.day) + '-' + str(current_time.year) #print datestring return ExpirySetter.setExpiryFromDate3(datestring) re_k = re.compile('\s?(2|to)mor?ro?w',re.IGNORECASE) if re_k.match(keyword): current_time = datetime.now() end_time = current_time + ExpirySetter.oneday datestring = str(end_time.month) + '-' + str(end_time.day) + '-' + str(end_time.year) #print datestring return ExpirySetter.setExpiryFromDate3(datestring) re_k = re.compile('\s?(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday).?',re.IGNORECASE) if re_k.match(keyword): days = ['monday','tuesday','wednesday','thursday','friday'] actual_day = 'monday' matchmax = Utilities.similarity(actual_day,keyword) for x in days[1:]: if Utilities.similarity(x,keyword) > matchmax: matchmax = Utilities.similarity(x,keyword) actual_day = x keyword = actual_day current_time = datetime.now() end_time = datetime(current_time.year,current_time.month,current_time.day,23,59,59) count = 0 # To avoid infinite loop #print keyword while keyword != end_time.strftime("%A").lower() and count < 8: end_time += ExpirySetter.oneday count += 1 if count > 8 : return None x = int((end_time - current_time).total_seconds()) return x if x > 0 else None
def on_data(self,data): try: data = json.loads(data) newd = {} # Get Tweet tweet = Utilities.clean_tweet(data['text']) for key in self.recent_tweets: #print Utilities.similarity(key,tweet) if Utilities.similarity(key,tweet) > 70: return ''' if tweet in self.recent_tweets: return else: ''' if len(self.recent_tweets) > 50: self.recent_tweets.popitem(last=False) self.recent_tweets[tweet] = True #print tweet # Get Redirected url try: url_name = Utilities.get_redirected_url(str(data['entities']['urls'][0]['expanded_url'])) except: return raise BaseException("Url for tweet did not exist") # Get shortened url for key --> Upto 5th '/' or entire address (whichever is shorter) url_name = Utilities.get_shortened_url(url_name).lower() #Get timestamp timestamp = str(data['created_at']) # Verify authenticity of website by checking if it has the word coupon # If it does , assume it is not a vendor site. Maybe blog, maybe coupon site try: Utilities.check_url_validity(url_name) except: return raise BaseException("Url was not a valid site") with open("x.txt","a") as f: f.write(tweet + '\n') f.write("--------------------" + '\n') # Code to extract important information from this tweet #self.tweets += 1 #print tweet #print "Tweet Number : " + str(self.tweets) e = Extraction() code,date = e.extract_all(tweet) if not code: #print " --------------- " return raise BaseException("Did not have coupon code information") if not date : date = 183600 else : self.tweets_with_dates += 1 self.total_expiry_time += date self.exp_time.append(date/3600) print self.tweets_with_dates, int(numpy.median(self.exp_time)) #print date #print self.tweets_with_dates print tweet #print " ----------------------------------- " #print "Tweet : ", #print "Url : ", #print url_name #print "Date : " #print "Coupons : " + str(self.tweets_with_coupons) #print "Dates : " + str(self.tweets_with_dates) #print "Total Expiry Time :" + str(self.total_expiry_time/3600) + "hours" #print "Avg Expiry Time :" + str((self.total_expiry_time/(self.tweets_with_dates+1))/3600) + "hours" print '--------------------------------------' #print "CODE : " + code key = url_name + ':::' + code #print "KEY : " + key #print "Tweet : " #print tweet #print "Url : ", #print url_name #print " ----------------------------------- " ds = DataStore() #print url_name,code,date #get outer url - url uptil 3 '/'s . eg - http://www.etsy.com/ outer_url = "parent::"+Utilities.get_shortened_url(url_name,3) ds.insert(key,url_name,code,tweet,date,outer_url) #print '-----------------------' return True except BaseException as e: if str(e) != "'text'": #print " *************** " + str(e) + " *************** " #print "----------------------------------------" pass time.sleep(1)