def get_tasks(): u = request.form['url'].lower() url = Utilities.get_shortened_url(u) url_3 = Utilities.get_shortened_url(u,3) return_only_parent = False # If url is same as parent url, return everything just for parent # Dont redundantly return for parent and itself if url == url_3 or url+'/' == url_3: return_only_parent = True ds = DataStore() if not return_only_parent: all_urls = Utilities.modify_url(url) print all_urls # If the same url is also a parent url, return all results of parent . # And skip individual url results for url in all_urls: result = ds.fetch(url) if result == False: print " Tried for url " + url else: x = {"result":result} return jsonify(x) # If for our exact url and its modifications , nothing got returned outer_url = "parent::" + Utilities.get_shortened_url(url,3) print outer_url result = ds.fetch_all_from_parent(outer_url) if result : x = {"result":result} return jsonify(x) else: if outer_url[-1] == '/': result = ds.fetch_all_from_parent(outer_url[:-1]) else: result = ds.fetch_all_from_parent(outer_url + '/') if result : x = {"result":result} return jsonify(x) # If there is still nothing to show return 'No Response'
def on_data(self,data): try: data = json.loads(data) newd = {} # Get Tweet tweet = Utilities.clean_tweet(data['text']) for key in self.recent_tweets: #print Utilities.similarity(key,tweet) if Utilities.similarity(key,tweet) > 70: return ''' if tweet in self.recent_tweets: return else: ''' if len(self.recent_tweets) > 50: self.recent_tweets.popitem(last=False) self.recent_tweets[tweet] = True #print tweet # Get Redirected url try: url_name = Utilities.get_redirected_url(str(data['entities']['urls'][0]['expanded_url'])) except: return raise BaseException("Url for tweet did not exist") # Get shortened url for key --> Upto 5th '/' or entire address (whichever is shorter) url_name = Utilities.get_shortened_url(url_name).lower() #Get timestamp timestamp = str(data['created_at']) # Verify authenticity of website by checking if it has the word coupon # If it does , assume it is not a vendor site. Maybe blog, maybe coupon site try: Utilities.check_url_validity(url_name) except: return raise BaseException("Url was not a valid site") with open("x.txt","a") as f: f.write(tweet + '\n') f.write("--------------------" + '\n') # Code to extract important information from this tweet #self.tweets += 1 #print tweet #print "Tweet Number : " + str(self.tweets) e = Extraction() code,date = e.extract_all(tweet) if not code: #print " --------------- " return raise BaseException("Did not have coupon code information") if not date : date = 183600 else : self.tweets_with_dates += 1 self.total_expiry_time += date self.exp_time.append(date/3600) print self.tweets_with_dates, int(numpy.median(self.exp_time)) #print date #print self.tweets_with_dates print tweet #print " ----------------------------------- " #print "Tweet : ", #print "Url : ", #print url_name #print "Date : " #print "Coupons : " + str(self.tweets_with_coupons) #print "Dates : " + str(self.tweets_with_dates) #print "Total Expiry Time :" + str(self.total_expiry_time/3600) + "hours" #print "Avg Expiry Time :" + str((self.total_expiry_time/(self.tweets_with_dates+1))/3600) + "hours" print '--------------------------------------' #print "CODE : " + code key = url_name + ':::' + code #print "KEY : " + key #print "Tweet : " #print tweet #print "Url : ", #print url_name #print " ----------------------------------- " ds = DataStore() #print url_name,code,date #get outer url - url uptil 3 '/'s . eg - http://www.etsy.com/ outer_url = "parent::"+Utilities.get_shortened_url(url_name,3) ds.insert(key,url_name,code,tweet,date,outer_url) #print '-----------------------' return True except BaseException as e: if str(e) != "'text'": #print " *************** " + str(e) + " *************** " #print "----------------------------------------" pass time.sleep(1)