Beispiel #1
0
def fb_get_postlinks_from_timeline(fbbrowser,
                                   url="https://facebook.com",
                                   count=10,
                                   logger=astra.baselogger):
    fbbrowser.get(url)
    karma.wait()
    postlinks = []

    while len(postlinks) < count:
        posts = []
        postcontents = []
        posts = posts + fbbrowser.find_elements_by_class_name(
            "userContentWrapper")
        for post in posts:
            postcontents.append(BeautifulSoup(post.get_property("innerHTML")))
        for postcontent in postcontents:

            pc = list(postcontent.find_all("a", {"class": "_5pcq"}))
            if len(pc) > 0:
                pc = pc[0]
            else:
                continue
            url = "https://facebook.com" + pc.get("href")
            postlinks.append(url)
        logger.info("Got %s posts " % (len(postlinks)))
        postlinks = list(set(postlinks))
        if "https://facebook.com#" in postlinks:
            postlinks.pop(postlinks.index("https://facebook.com#"))
        karma.scroll_page(fbbrowser)
        karma.wait()
    return postlinks[:count]
Beispiel #2
0
def poll(botname, botstop):
    while True:
        if not os.path.exists(botstop):
            botname.logger.info(
                u"{} waiting for messages. Touch {} to stop".format(
                    botname.name, botstop))
            #botname.updater.start_polling()
            #botname.updater.stop()
            karma.wait(waittime="medium", logger=botname.logger)
            try:
                p = botname.get_latest_updates()
                botname.logger.info(u"{} got {} messages".format(
                    botname.name, len(p)))
                if len(p) > 0:
                    for update in p:
                        botname.updater.dispatcher.process_update(update)
                        #messageparser(botname,update,handler,logger=botname.logger)
            except Exception as e:
                botname.logger.warning(
                    "Timed out, will try again in a bit {}".format(repr(e)))
        else:
            botname.logger.info(
                u"Stopfile found, {} exiting bot update loop".format(
                    botname.name))
            break
Beispiel #3
0
def twython_get_ntweets_for_search(tw,
                                   search,
                                   tcount,
                                   geocode=None,
                                   maxtries=0,
                                   logger=astra.baselogger):
    #tweets=[]
    p = pandas.DataFrame(
        tw.search(q=search, count=tcount, tweet_mode="extended")['statuses'])
    if len(p) > 0:
        tweetdf = p.drop_duplicates(subset="full_text",
                                    keep="first").reset_index()
    else:
        return pandas.DataFrame()
    tries = 0
    if maxtries == 0:
        maxtries = (tcount / 20) * 3
    while len(tweetdf) < tcount:
        logger.info("Currently have %s tweets" % len(tweetdf))
        if "id" in p.columns:
            p = pandas.DataFrame(
                tw.search(q=search,
                          count=tcount,
                          tweet_mode="extended",
                          max_id=p['id'].iloc[-1])['statuses'])
        else:
            logger.info(p.columns)
        logger.info("Got %s more tweets" % len(p))
        if len(p) > 1:
            p = p.drop_duplicates(subset="full_text",
                                  keep="first").reset_index(drop=True)
        else:
            logger.info("Empty tweetset")
            break
        logger.info("%s tweets are unique" % len(p))
        tweetdf = pandas.concat([tweetdf, p])
        tweetdf = tweetdf.drop_duplicates(subset="full_text",
                                          keep="first").reset_index(drop=True)
        logger.info("Sleeping...")
        karma.wait(logger=logger)
        logger.info("Got %s tweets" % len(tweetdf))
        tries += 1
        if (tries > maxtries):
            break
    return tweetdf.head(tcount)
Beispiel #4
0
def twython_search(tw,
                   searchstring,
                   logger=astra.baselogger,
                   tcount=100,
                   maxtries=10):
    results = []
    logger.info("Searching Twitter for " + searchstring)
    results = tw.search(q=searchstring, count=tcount,
                        tweet_mode="extended")['statuses']
    #logger.info("Got " + str(len(results)))
    tries = 0
    while len(results) < tcount:
        if tries > maxtries:
            break
        maxid = results[-1]['id']
        results = results + tw.search(
            q=searchstring, count=tcount, max_id=maxid)['statuses']
        karma.wait(logger=logger)
        tries += 1
    logger.info("Got " + str(len(results)) + " for search query " +
                searchstring)
    return results[:tcount]