def fb_get_postlinks_from_timeline(fbbrowser, url="https://facebook.com", count=10, logger=astra.baselogger): fbbrowser.get(url) karma.wait() postlinks = [] while len(postlinks) < count: posts = [] postcontents = [] posts = posts + fbbrowser.find_elements_by_class_name( "userContentWrapper") for post in posts: postcontents.append(BeautifulSoup(post.get_property("innerHTML"))) for postcontent in postcontents: pc = list(postcontent.find_all("a", {"class": "_5pcq"})) if len(pc) > 0: pc = pc[0] else: continue url = "https://facebook.com" + pc.get("href") postlinks.append(url) logger.info("Got %s posts " % (len(postlinks))) postlinks = list(set(postlinks)) if "https://facebook.com#" in postlinks: postlinks.pop(postlinks.index("https://facebook.com#")) karma.scroll_page(fbbrowser) karma.wait() return postlinks[:count]
def poll(botname, botstop): while True: if not os.path.exists(botstop): botname.logger.info( u"{} waiting for messages. Touch {} to stop".format( botname.name, botstop)) #botname.updater.start_polling() #botname.updater.stop() karma.wait(waittime="medium", logger=botname.logger) try: p = botname.get_latest_updates() botname.logger.info(u"{} got {} messages".format( botname.name, len(p))) if len(p) > 0: for update in p: botname.updater.dispatcher.process_update(update) #messageparser(botname,update,handler,logger=botname.logger) except Exception as e: botname.logger.warning( "Timed out, will try again in a bit {}".format(repr(e))) else: botname.logger.info( u"Stopfile found, {} exiting bot update loop".format( botname.name)) break
def twython_get_ntweets_for_search(tw, search, tcount, geocode=None, maxtries=0, logger=astra.baselogger): #tweets=[] p = pandas.DataFrame( tw.search(q=search, count=tcount, tweet_mode="extended")['statuses']) if len(p) > 0: tweetdf = p.drop_duplicates(subset="full_text", keep="first").reset_index() else: return pandas.DataFrame() tries = 0 if maxtries == 0: maxtries = (tcount / 20) * 3 while len(tweetdf) < tcount: logger.info("Currently have %s tweets" % len(tweetdf)) if "id" in p.columns: p = pandas.DataFrame( tw.search(q=search, count=tcount, tweet_mode="extended", max_id=p['id'].iloc[-1])['statuses']) else: logger.info(p.columns) logger.info("Got %s more tweets" % len(p)) if len(p) > 1: p = p.drop_duplicates(subset="full_text", keep="first").reset_index(drop=True) else: logger.info("Empty tweetset") break logger.info("%s tweets are unique" % len(p)) tweetdf = pandas.concat([tweetdf, p]) tweetdf = tweetdf.drop_duplicates(subset="full_text", keep="first").reset_index(drop=True) logger.info("Sleeping...") karma.wait(logger=logger) logger.info("Got %s tweets" % len(tweetdf)) tries += 1 if (tries > maxtries): break return tweetdf.head(tcount)
def twython_search(tw, searchstring, logger=astra.baselogger, tcount=100, maxtries=10): results = [] logger.info("Searching Twitter for " + searchstring) results = tw.search(q=searchstring, count=tcount, tweet_mode="extended")['statuses'] #logger.info("Got " + str(len(results))) tries = 0 while len(results) < tcount: if tries > maxtries: break maxid = results[-1]['id'] results = results + tw.search( q=searchstring, count=tcount, max_id=maxid)['statuses'] karma.wait(logger=logger) tries += 1 logger.info("Got " + str(len(results)) + " for search query " + searchstring) return results[:tcount]