except Exception as e: print("ERROR:" + str(e)) if __name__ == "__main__": print("------------------------------------------ Iniciando. Hora: " + str(datetime.now()) + " -----------------------") browser = mechanicalsoup.StatefulBrowser(soup_config={'features': 'lxml'}) scrapeCuprum(browser) scrapeProvida(browser) scrapeSVS(browser) print(guardado) print(MsjGuardado) if guardado: token = "895064451677736960-JCDhYCZ7wIztWljb9Wp36W2HXl9ZW3F" token_secret = "5Fbpzl7g61ZEWFfXRxvU9Jk8en5TI8piCn7p0okOSdvX8" consumer_key = "2b1fcHhGndhj3ocgZW1goGkFQ" consumer_secret = "cbxMnFMQvbTPpd4OGvZyOkiAGQARuJEIf85JcxE6RkcYgUZas3" t = Twitter( auth=OAuth(token, token_secret, consumer_key, consumer_secret)) for msj in MsjGuardado: t.statuses.update(status=msj) print("------------------------------------------ Terminó: " + str(datetime.now()) + " -----------------------")
users = sorted(set(users) | set(new_users)) json.dump(users, fp) return users def update_links(new_links, filename): with open(filename, 'r') as fp: links = [tuple(l.split(",")) for l in fp.read().split("\n")[1:]] with open(filename, 'w') as fp: links = sorted(set(links) | set(new_links)) fp.write("source,target,datetime\n") fp.write("\n".join([",".join(l) for l in links])) while True: t = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)) # Get users that tweeted with #netsci2018 hashtag print "Searching for tweets with hashtags '#NetSci2018' and '#NetSci18'" collection_tweets = [] for hashtag in hashtags: collection_tweets += search_tweets(hashtag)['statuses'] print "Loaded %d tweets" % len(collection_tweets), print "from", users = sorted( set( update_users([ tweet['user']['screen_name'] for tweet in collection_tweets if tweet['text'][:2] != "RT" ]))) # Everybody who has tweeted
from twitter import Twitter, OAuth from os import environ from pprint import pprint consumer_key = environ['TWITTER_CONSUMER_KEY'] consumer_secret = environ['TWITTER_CONSUMER_SECRET'] access_token = environ['TWITTER_ACCESS_TOKEN'] access_secret = environ['TWITTER_ACCESS_SECRET'] t = Twitter( auth=OAuth(access_token, access_secret, consumer_key, consumer_secret)) statusUpdate = t.statuses.update(status='Hallo, word!') pprint(statusUpdate)
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream try: import json except ImportError: import simplejson as json config = {} execfile("config.py", config) oauth = OAuth(config["ACCESS_TOKEN"], config["ACCESS_SECRET"], config["CONSUMER_KEY"], config["CONSUMER_SECRET"]) twitter_stream = TwitterStream(auth=oauth) iterator = twitter_stream.statuses.filter(track="@twitter", country="United States", countrycode="US") tweet_count = 882 for tweet in iterator: tweet_count -= 1 print json.dumps(tweet) if tweet_count <= 0: break
except ImportError: import simplejson as json from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream NUM_TWEETS = 5000000 k = 1 ACCESS_TOKEN = os.environ.get('TWITTER_ACCESS_TOKEN') ACCESS_SECRET = os.environ.get('TWITTER_ACCESS_SECRET') API_KEY = os.environ.get('TWITTER_API_KEY') API_SECRET = os.environ.get('TWITTER_API_SECRET') print("starting") oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, API_KEY, API_SECRET) twitter_stream = TwitterStream(auth=oauth) tweets = twitter_stream.statuses.sample() allTweets = [] print("getting tweets") tweetTotal = NUM_TWEETS ''' Gets tweets from Twitter, creates dictionaries from them, and writes them to file as csv ''' for tweet in tweets:
def connect(self): self.client = Twitter(auth=OAuth(self.token, self.token_key, self.con_secret, self.con_secret_key))
from twitter.api import Twitter, TwitterError from twitter.oauth import OAuth, read_token_file from twitter.oauth_dance import oauth_dance from twitter.auth import NoAuth from twitter.util import Fail, err, expand_line, parse_host_list from twitter.follow import lookup CONSUMER_KEY='XLVBlYhYqJNAPPD5OEQ' CONSUMER_SECRET='EUDfuBcgB37Dn34Vo6tSaKcBKQESQOW1M6PIMQ' oauth_filename = (os.getenv("HOME", "") + os.sep + ".twitter-archiver_oauth") # if not os.path.exists(oauth_filename): # oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET, # oauth_filename) oauth_token, oauth_token_secret = read_token_file(oauth_filename) auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET) t = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com') # print twitter.statuses.home_timeline() print argv[1] # read the file def main(): json_file = open(argv[1], 'r') for line in json_file.readlines(): item = json.loads(line) if item.has_key('asker'): twitter_name = item['asker']['twitter_username'] if item.has_key('answerer'):
# also collects user information from these tweets and adds them to TwitterUsers.json # Finally adds 5 tweet per user creating a nice pool of mixed tweets. ####################################################################################################################################### import json from utils import ConfigProvider, FileContentLoader from twitter import OAuth, Twitter, TwitterStream if __name__ == '__main__': AllTweets = None AllUsers = {} AllNonEventTweets = None # Initiate the connection to Twitter Streaming / search API oauth = OAuth(ConfigProvider.AccessToken, ConfigProvider.AccessSecret, ConfigProvider.ConsumerKey, ConfigProvider.ConsumberSecret) twitter_stream = TwitterStream(auth=oauth) twitter = Twitter(auth=oauth) # Load life events detail and collect tweets for each life event LifeEventsList = FileContentLoader.LifeEventsList() for LifeEvent in LifeEventsList["LifeEventList"]: # Collect 10 tweets for current life event # print "Processing tweets for life event:" , LifeEvent["Topic"] EventTweets = twitter.search.tweets(q=LifeEvent["Event"], lang='en', count=10) # Collect 5 tweets per stem word for this life event category for StemWord in LifeEvent["StemWords"]: EventStemWordTweets = twitter.search.tweets(q=StemWord, lang='en',
def getSparkSessionInstance(sparkConf): if ('sparkSessionSingletonInstance' not in globals()): globals()['sparkSessionSingletonInstance'] = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate() return globals()['sparkSessionSingletonInstance'] def savetweets(): ssc = StreamingContext(sc, 600) kvs = KafkaUtils.createDirectStream(ssc, ["test"], {"metadata.broker.list": "localhost:9092"}) kvs.foreachRDD(receiveTweets) producer.flush() ssc.start() ssc.awaitTermination() def receiveTweets(time, rdd): iterator = twitter_stream.statuses.sample() count = 0 for tweet in iterator: if 'extTweet' in tweet: producer.send('savedata', bytes(json.dumps(tweet, indent=6), "ascii")) count+=1 if(count>=20000): break if __name__ == "__main__": sc = SparkContext(appName="Project 2: Store Tweets") credentials = read_credentials() oauth = OAuth(credentials['ACCESS_TOKEN'], credentials['ACCESS_SECRET'], credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET']) twitter_stream = TwitterStream(auth=oauth) producer = KafkaProducer(bootstrap_servers='localhost:9092') savetweets()
def __init__(self, access_token, access_token_secret, consumer_key, consumer_secret): self.twitter = Twitter( auth=OAuth(access_token, access_token_secret, consumer_key, consumer_secret), retry=10)
from twitter import Twitter from twitter import OAuth t = Twitter(auth=OAuth( '1539088022-I8k9xAUGjAfxGfgHQj69YMaTHZcLUsR2Nz9bvst', 'n44jnA3XPrLV2eXmS8Xqn1SKDj2GDD9WJk432FySfPkt4', 'jjMY07Ck9Zb41CyL8cWxAcwNl', '2ie5wUbzdTR2Y8hpBBeeoQLXc0UO8WFizhdgx9mYFaxyHqIxsG' )) pythonTweets = t.search.tweets(q='#python') # print(str(pythonTweets).encode('GBK', 'ignore')) statusUpdate = t.statuses.update(status='Hello, world kkk!') print(str(statusUpdate).encode('GBK', 'ignore'))
""" Streaming APIs give access to (usually a sample of) all tweets as they published on Twitter. """ # The Streaming API only sends out real-time tweets # Import JSON to deal with twitter wrapper output # import json import tokens as tokens # Importing twitter wrapper library # from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream from twitter import Twitter, OAuth oauth = OAuth(tokens.ACCESS_TOKEN, tokens.ACCESS_SECRET, tokens.CONSUMER_KEY, tokens.CONSUMER_SECRET) # Initiate the connection to Twitter REST API twitter = Twitter(auth=oauth) # Search for latest tweets about "#nlproc" # twitter.search.tweets(q='eleição') # Get all the locations where Twitter provides trends service # Brazil 23424768 # world_trends = twitter.trends.available(_woeid=23424768) # Brasilia 455819 # world_trends = twitter.trends.available(_woeid=455819) # Sao Paulo 455827 # world_trends = twitter.trends.available(_woeid=455827)
def authenticate(self): ''' Authenticate app using OAuth ''' self.oauth = OAuth(self.ACCESS_TOKEN, self.ACCESS_SECRET, self.CONSUMER_KEY, self.CONSUMER_SECRET)
return t.search.tweets(q='@' + BOT_NAME, result_type='recent', since_id=latest_id)['statuses'] # return the id of the latest tweet mentioning @BOT_NAME def fetch_latest_id(): return t.search.tweets(q='@' + BOT_NAME, result_type='recent', count=1)['statuses'][0]['id'] if __name__ == '__main__': # initialize Twitter connection t = Twitter( auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET)) # read in the latest id from the last check f = open('.latest_id', 'r') latest_id = f.read().rstrip() f.close() # check for unseen tweets since the latest id results = fetch_unseen_mentions(latest_id) # if we got any tweets, reply to them if results: for tweet in reversed(results): tweeter = tweet['user']['screen_name'] artist = random_artist()
def tweet(title, collaborations, url, version=1): """ Announce addition or revision of a HEPData record on Twitter. :param title: :param collaborations: :param url: :param version: :return: """ if USE_TWITTER: OAUTH_TOKEN = current_app.config['OAUTH_TOKEN'] OAUTH_SECRET = current_app.config['OAUTH_SECRET'] CONSUMER_KEY = current_app.config['CONSUMER_KEY'] CONSUMER_SECRET = current_app.config['CONSUMER_SECRET'] if not OAUTH_TOKEN or not OAUTH_SECRET or not CONSUMER_KEY or not CONSUMER_SECRET: # log this error print("Twitter credentials must be supplied!") else: twitter = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET)) cleaned_title = decode_string( encode_string(title)) # in case of binary characters in title cleaned_title = replace( cleaned_title) # use UnicodeIt to replace LaTeX expressions cleaned_title = cleanup_latex( cleaned_title) # remove some remaining LaTeX encodings words = len(cleaned_title.split()) # Try to tweet with complete paper title. # If tweet exceeds 280 characters, keep trying with one less word each time. tweeted = False while words and not tweeted: try: if version == 1: status = "Added{0} data on \"{1}\" to {2}".format( get_collaboration_string(collaborations), truncate_string(cleaned_title, words), url) else: status = "Revised{0} data on \"{1}\" at {2}?version={3}".format( get_collaboration_string(collaborations), truncate_string(cleaned_title, words), url, version) twitter.statuses.update(status=status) tweeted = True print("Tweeted: {}".format(status)) except Exception as e: # It would be nice to get a stack trace here if e.e.code == 403: error = json.loads(e.response_data.decode('utf8')) if error["errors"][0][ "code"] == 186: # Status is over 140 characters. words = words - 1 # Try again with one less word. else: break else: break if not tweeted: print(e.__str__()) print("(P) Failed to post tweet for record {0}".format(url))
) print( colored("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t WELCOME TO TWITTERBOT ", color='blue', attrs=['bold'])) #----------------------------------------------------------------------------------------------------------------------- # CREDENTIALS API_KEY = 'tWlZPllDzXGvuRaPYBMu82WjE' API_SECRET = 'cBWdZn91zmX7LtQI8UczNdFC66TdkPdDHfLS9lVkpEN4cwDJxL' ACCESS_TOKEN = '410532691-IK0YkbvXH4A0SVrgHiuKHWVpB4Srq9fGhsj1zAmr' ACCESS_TOKEN_SECRET = 'Ag3H99umpVoaaY51Tzf5p6DIKdugDQuKEfMzkDQoud41H' #----------------------------------------------------------------------------------------------------------------------- twitter_oauth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET) twitter = Twitter(auth=twitter_oauth) oauth = tweepy.OAuthHandler(API_KEY, API_SECRET) oauth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(oauth) #----------------------------------------------------------------------------------------------------------------------- #FOR DISPLAYING MY ACCOUNT DETAILS def details(): myaccount = api.me() print(colored("\n Personal Details", color='blue', attrs=['underline'])) print 'Name: ' + myaccount.name print 'Friends: ' + str(myaccount.friends_count)
import pathlib from twitter import OAuth import encrypt import tweepy env_file = str(pathlib.Path.cwd().parent) + "\\vars.json" key_path = str(pathlib.Path.cwd().parent) + "\\key.key" key = encrypt.load_key(key_path) env_vars = encrypt.decrypt_return_data(env_file, key) auth = OAuth(env_vars["TWITTER_TOKEN_KEY"], env_vars["TWITTER_TOKEN_SECRET"], env_vars["TWITTER_CONSUMER_KEY"], env_vars["TWITTER_CONSUMER_SECRET"]) class StreamListener(tweepy.StreamListener): def on_status(self, tweet): print(f"{tweet.user.name}:{tweet.text}") print(f"https://twitter.com/user/status/{tweet.id}") def on_error(self, status_code): if status_code == 420: return False print("Error Detected") api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# https://github.com/find-evil/echo_chamber # https://jackie.lol/posts/presenting-echo-chamber-a-python-tool-for-blacklivesmatter/ import os import json from twitter import OAuth, Twitter # REPLACE THESE PLACEHOLDERS WITH YOUR TWITTER API KEYS t = Twitter( auth=OAuth('token', 'token_secret', 'consumer_key', 'consumer_secret')) # SET YOUR TWITTER USERNAME HERE your_username = "******" # SET YOUR SEARCH TERM HERE HASHTAG = "#BlackLivesMatter" # OUTPUT LISTS did_tweet = list() did_not_tweet = list() could_not_send_dm = list() # GET LIST OF PEOPLE YOU FOLLOW os.system('twitter-follow -o -g ' + your_username + '> tw_following.txt') followingList = list() with open("tw_following.txt", "r") as myfile: for line in myfile: followingList.append(line.strip()) # GET LIST OF PEOPLE WHO FOLLOW YOU os.system('twitter-follow -o -r ' + your_username + '> tw_followers.txt')
def update_featured_social(): """ Update featured tweets """ COPY = copytext.Copy(app_config.COPY_PATH) secrets = app_config.get_secrets() # Twitter print 'Fetching tweets...' twitter_api = Twitter(auth=OAuth(secrets['TWITTER_API_OAUTH_TOKEN'], secrets['TWITTER_API_OAUTH_SECRET'], secrets['TWITTER_API_CONSUMER_KEY'], secrets['TWITTER_API_CONSUMER_SECRET'])) tweets = [] for i in range(1, 4): tweet_url = COPY['share']['featured_tweet%i' % i] if isinstance(tweet_url, copytext.Error) or unicode(tweet_url).strip() == '': continue tweet_id = unicode(tweet_url).split('/')[-1] tweet = twitter_api.statuses.show(id=tweet_id) creation_date = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y') creation_date = '%s %i' % (creation_date.strftime('%b'), creation_date.day) tweet_url = 'https://twitter.com/%s/status/%s' % ( tweet['user']['screen_name'], tweet['id']) photo = None html = tweet['text'] subs = {} for media in tweet['entities'].get('media', []): original = tweet['text'][media['indices'][0]:media['indices'][1]] replacement = '<a href="%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'link\', 0, \'%s\']);">%s</a>' % ( media['url'], app_config.PROJECT_SLUG, tweet_url, media['display_url']) subs[original] = replacement if media['type'] == 'photo' and not photo: photo = {'url': media['media_url']} for url in tweet['entities'].get('urls', []): original = tweet['text'][url['indices'][0]:url['indices'][1]] replacement = '<a href="%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'link\', 0, \'%s\']);">%s</a>' % ( url['url'], app_config.PROJECT_SLUG, tweet_url, url['display_url']) subs[original] = replacement for hashtag in tweet['entities'].get('hashtags', []): original = tweet['text'][ hashtag['indices'][0]:hashtag['indices'][1]] replacement = '<a href="https://twitter.com/hashtag/%s" target="_blank" onclick="_gaq.push([\'_trackEvent\', \'%s\', \'featured-tweet-action\', \'hashtag\', 0, \'%s\']);">%s</a>' % ( hashtag['text'], app_config.PROJECT_SLUG, tweet_url, '#%s' % hashtag['text']) subs[original] = replacement for original, replacement in subs.items(): html = html.replace(original, replacement) # https://dev.twitter.com/docs/api/1.1/get/statuses/show/%3Aid tweets.append({ 'id': tweet['id'], 'url': tweet_url, 'html': html, 'favorite_count': tweet['favorite_count'], 'retweet_count': tweet['retweet_count'], 'user': { 'id': tweet['user']['id'], 'name': tweet['user']['name'], 'screen_name': tweet['user']['screen_name'], 'profile_image_url': tweet['user']['profile_image_url'], 'url': tweet['user']['url'], }, 'creation_date': creation_date, 'photo': photo }) # Facebook print 'Fetching Facebook posts...' fb_api = GraphAPI(secrets['FACEBOOK_API_APP_TOKEN']) facebook_posts = [] for i in range(1, 4): fb_url = COPY['share']['featured_facebook%i' % i] if isinstance(fb_url, copytext.Error) or unicode(fb_url).strip() == '': continue fb_id = unicode(fb_url).split('/')[-1] post = fb_api.get_object(fb_id) user = fb_api.get_object(post['from']['id']) user_picture = fb_api.get_object('%s/picture' % post['from']['id']) likes = fb_api.get_object('%s/likes' % fb_id, summary='true') comments = fb_api.get_object('%s/comments' % fb_id, summary='true') #shares = fb_api.get_object('%s/sharedposts' % fb_id) creation_date = datetime.strptime(post['created_time'], '%Y-%m-%dT%H:%M:%S+0000') creation_date = '%s %i' % (creation_date.strftime('%b'), creation_date.day) # https://developers.facebook.com/docs/graph-api/reference/v2.0/post facebook_posts.append({ 'id': post['id'], 'message': post['message'], 'link': { 'url': post['link'], 'name': post['name'], 'caption': (post['caption'] if 'caption' in post else None), 'description': post['description'], 'picture': post['picture'] }, 'from': { 'name': user['name'], 'link': user['link'], 'picture': user_picture['url'] }, 'likes': likes['summary']['total_count'], 'comments': comments['summary']['total_count'], #'shares': shares['summary']['total_count'], 'creation_date': creation_date }) # Render to JSON output = {'tweets': tweets, 'facebook_posts': facebook_posts} with open('data/featured.json', 'w') as f: json.dump(output, f)
def wanted_generator(settings: 'Settings', ext_logger: OptionalLogger, attrs: QuerySet): own_settings = settings.providers[constants.provider_name] def process_wani_tweets(current_tweets: List[Dict[str, Any]], local_logger=None): publisher = 'wanimagazine' source = 'twitter' for tweet in current_tweets: cover_url = None if 'media' in tweet['entities']: for media in tweet['entities']['media']: cover_url = media['media_url'] tweet_obj, tweet_created = TweetPost.objects.get_or_create( tweet_id=tweet['id'], defaults={ 'text': tweet['text'], 'user': publisher, 'posted_date': datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y"), 'media_url': cover_url }) if not tweet_created: continue local_logger.info("Created tweet id: {}processing.".format( tweet_obj.tweet_id)) match_tweet_type = re.search('【(.+)】(.*)', tweet['text'], re.DOTALL) if match_tweet_type: local_logger.info( "Matched pattern (date_type: {}, artist: {}),".format( match_tweet_type.group(1), match_tweet_type.group(2))) release_type = None release_date = None date_type = re.search(r'.*?(\d+)/(\d+).*?', match_tweet_type.group(1), re.DOTALL) announce_date = datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") if date_type: release_type = 'release_date' release_date = announce_date.replace( month=int(date_type.group(1)), day=int(date_type.group(2)), hour=0, minute=0, second=0) new_book_type = re.search('新刊情報', match_tweet_type.group(1), re.DOTALL) if new_book_type: release_type = 'new_publication' release_date = datetime.strptime( tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") out_today_type = re.search('本日発売', match_tweet_type.group(1), re.DOTALL) if out_today_type: release_type = 'out_today' release_date = datetime.strptime( tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") out_tomorrow_type = re.search('明日発売', match_tweet_type.group(1), re.DOTALL) if out_tomorrow_type: release_type = 'out_tomorrow' release_date = datetime.strptime( tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") + timedelta(days=1) match_title_artists = re.search('^『(.+?)』は<(.+)>', match_tweet_type.group(2), re.DOTALL) if match_title_artists and release_type: local_logger.info( "Matched pattern (title: {}, artists: {}), release_type: {}." .format(match_title_artists.group(1), match_title_artists.group(2), release_type)) title = match_title_artists.group(1) title = title.replace("X-EROS#", "X-EROS #") artists = set( match_title_artists.group(2).replace('ほか', '').split('/')) if len(artists) > 1: book_type = 'magazine' else: book_type = '' wanted_gallery, created = WantedGallery.objects.get_or_create( title_jpn=title, search_title=format_title_to_wanted_search(title), publisher=publisher, defaults={ 'title': title, 'book_type': book_type, 'add_as_hidden': True, 'category': 'Manga', 'reason': 'wanimagazine', 'public': own_settings.add_as_public }) if created: wanted_gallery.should_search = True wanted_gallery.keep_searching = True wanted_gallery.save() local_logger.info( "Created wanted gallery (magazine): {}, search title: {}" .format(wanted_gallery.get_absolute_url(), title)) announce, announce_created = wanted_gallery.announces.get_or_create( announce_date=announce_date, release_date=release_date, type=release_type, source=source, ) if announce_created and cover_url: announce.save_img(cover_url) # wanted_gallery.calculate_nearest_release_date() wanted_gallery.release_date = release_date wanted_gallery.save() for artist in artists: artist_obj = Artist.objects.filter( name_jpn=artist).first() if not artist_obj: artist_obj = Artist.objects.create(name=artist, name_jpn=artist) wanted_gallery.artists.add(artist_obj) match_artist_title = re.search('^(.+?)『(.+?)』.*', match_tweet_type.group(2), re.DOTALL) if match_artist_title and release_type: local_logger.info( "Matched pattern (artist: {}, title: {}), release type: {}." .format(match_artist_title.group(1), match_artist_title.group(2), release_type)) artist = match_artist_title.group(1) title = match_artist_title.group(2) title = title.replace("X-EROS#", "X-EROS #") cover_artist = None book_type = None if '最新刊' in artist: artist = artist.replace('最新刊', '') book_type = 'new_publication' cover_artist = Artist.objects.filter( name_jpn=artist).first() if not cover_artist: cover_artist = Artist.objects.create( name=artist, name_jpn=artist) elif '初単行本' in artist and ('『' not in artist and '』' not in artist): artist = artist.replace('初単行本', '') book_type = 'first_book' cover_artist = Artist.objects.filter( name_jpn=artist).first() if not cover_artist: cover_artist = Artist.objects.create( name=artist, name_jpn=artist) elif '表紙が目印の' in artist: artist = artist.replace('表紙が目印の', '') book_type = "magazine" cover_artist = Artist.objects.filter( name_jpn=artist).first() if not cover_artist: cover_artist = Artist.objects.create( name=artist, name_jpn=artist) if book_type: wanted_gallery, created = WantedGallery.objects.update_or_create( title_jpn=title, search_title=format_title_to_wanted_search(title), publisher=publisher, defaults={ 'cover_artist': cover_artist, 'title': title, 'book_type': book_type, 'add_as_hidden': True, 'category': 'Manga', 'reason': 'wanimagazine', 'public': own_settings.add_as_public }) if created: wanted_gallery.should_search = True wanted_gallery.keep_searching = True wanted_gallery.save() local_logger.info( "Created wanted gallery (anthology): {}, search title: {}" .format(wanted_gallery.get_absolute_url(), title)) announce, announce_created = wanted_gallery.announces.get_or_create( announce_date=announce_date, release_date=release_date, type=release_type, source=source, ) if announce_created and cover_url: announce.save_img(cover_url) # wanted_gallery.calculate_nearest_release_date() wanted_gallery.release_date = release_date wanted_gallery.save() artist_obj = Artist.objects.filter( name_jpn=artist).first() if not artist_obj: artist_obj = Artist.objects.create(name=artist, name_jpn=artist) wanted_gallery.artists.add(artist_obj) else: local_logger.info( "Created tweet id: {} did not match the pattern".format( tweet_obj.tweet_id)) if not all([getattr(own_settings, x) for x in CREDENTIALS]): ext_logger.error( 'Cannot work with Twitter unless all credentials are set.') return t = Twitter(auth=OAuth( own_settings.token, own_settings.token_secret, own_settings.consumer_key, own_settings.consumer_secret, )) tweet_posts = TweetPost.objects.all() if tweet_posts: max_id = tweet_posts.aggregate(Max('tweet_id'))['tweet_id__max'] while True: ext_logger.info("Fetching since tweet id: {}".format(max_id)) tweets = t.statuses.user_timeline(screen_name='wanimagazine', include_rts=False, exclude_replies=True, trim_user=True, count=200, since_id=max_id) if not tweets: ext_logger.info("No more tweets to fetch, ending") break new_max_id = max(tweets, key=lambda x: x['id'])['id'] process_wani_tweets(tweets, local_logger=ext_logger) if new_max_id == max_id: ext_logger.info( "No more new tweets fetched, stopping at: {}".format( max_id)) break else: max_id = new_max_id else: min_id = None while True: if min_id: ext_logger.info( "Fetching backwards with max id: {}".format(min_id)) tweets = t.statuses.user_timeline(screen_name='wanimagazine', include_rts=False, exclude_replies=True, trim_user=True, count=200, max_id=min_id) else: ext_logger.info("Starting from newer tweet.") tweets = t.statuses.user_timeline(screen_name='wanimagazine', include_rts=False, exclude_replies=True, trim_user=True, count=200) if not tweets: ext_logger.info("No more tweets to fetch, ending") break new_min_id = min(tweets, key=lambda x: x['id'])['id'] process_wani_tweets(tweets, local_logger=ext_logger) if new_min_id == min_id: ext_logger.info( "No more new tweets fetched, stopping at: {}".format( min_id)) break else: min_id = new_min_id
def __init__(self): self.aouth = OAuth(Twitter_Access_Token,Twitter_Access_TokenSecret,Twitter_Consumer_Key,Twitter_Consumer_Secret) self.twitter_search= Twitter(auth=self.aouth)
# encoding: utf-8 from __future__ import unicode_literals from random import choice import time import pickle import json from twitter import Twitter, NoAuth, OAuth, read_token_file, TwitterHTTPError from twitter.api import TwitterDictResponse, TwitterListResponse from twitter.cmdline import CONSUMER_KEY, CONSUMER_SECRET noauth = NoAuth() oauth = OAuth(*read_token_file('tests/oauth_creds') + (CONSUMER_KEY, CONSUMER_SECRET)) twitter11 = Twitter(domain='api.twitter.com', auth=oauth, api_version='1.1') twitter11_na = Twitter(domain='api.twitter.com', auth=noauth, api_version='1.1') AZaz = "abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ" def get_random_str(): return ''.join(choice(AZaz) for _ in range(10))
logging.basicConfig(filename=env['LOG_FILE'],level=logging.DEBUG) # CloudMQTT config items def on_publish(client, userdata, mid): logging.info("mid: "+str(mid)) client = paho.Client() client.on_publish = on_publish client.connect('localhost', 8833) # Twitter app config items stream = TwitterStream( auth=OAuth( env['TW_ACCESS_TOKEN'], env['TW_ACCESS_SECRET'], env['TW_CONSUMER_KEY'], env['TW_CONSUMER_SECRET'] ) ) tweets = stream.statuses.filter(track=env['HASH_TAGS']) for tweet in tweets: if 'RT' not in tweet['text']: logging.debug(tweet['text']) msg_info = client.publish(env['TW_STREAM_TOPIC'], tweet['text']) if not msg_info.is_published(): logging.error('Message is not yet published.')
from watson_developer_cloud import NaturalLanguageUnderstandingV1 from watson_developer_cloud.natural_language_understanding_v1 import Features, EntitiesOptions, SentimentOptions from kafka import KafkaProducer from random import randrange, uniform import boto3 producer = KafkaProducer( value_serializer=lambda m: json.dumps(m).encode('ascii'), bootstrap_servers=config.KAFKA_SERVER) natural_language_understanding = NaturalLanguageUnderstandingV1( version=config.WATSON_VERSION, username=config.WATSON_USERNAME, password=config.WATSON_PASSWORD) gmaps = googlemaps.Client(key=config.GOOGLE_API_KEY) oauth = OAuth(config.ACCESS_TOKEN, config.ACCESS_SECRET, config.CONSUMER_KEY, config.CONSUMER_SECRET) twitter_stream = TwitterStream(auth=oauth) sns = boto3.client('sns', region_name='us-east-1') def handler(event, context): iterator = twitter_stream.statuses.filter(track=event['keyword']) count = 10 result = [] for tweet in iterator: try: text = tweet['text'] except Exception as e: continue if tweet['lang'] == 'en':
RAW_TWEET_DIR = 'raw_tweet' # maybe create raw_tweet dir if not os.path.exists(RAW_TWEET_DIR): os.makedirs(RAW_TWEET_DIR) # retrieve credentials # twitter dev api config = {} execfile( "/home/mike/Documents/repoo/information_diffusion/twitter_api/config/config.py", config) # Create twitter API object api = Twitter(auth=OAuth(config["ACCESS_TOKEN"], config["ACCESS_TOKEN_SECRET"], config["CONSUMER_KEY"], config["CONSUMER_SECRET"])) def datetime_filename(prefix='output_'): outputName = prefix + '{:%Y%m%d%H%M%S}utc.txt'.format( datetime.datetime.utcnow()) return outputName def scrape(tweets_per_file=10000): f = open(datetime_filename(prefix='{0}/en_tweet_'.format(RAW_TWEET_DIR)), 'w') tweet_count = 0 try: for line in api.GetStreamSample(): if 'text' in line and line['lang'] == u'en':
def __init__(self): self.__current_thread_ident = None self.__oauth = OAuth(**OAUTH_INFO) #self.__es = Elasticsearch() self.__es = Elasticsearch([ELASTICSEARCH_IP])
import json except ImportError: import simplejson as json import os # Import the necessary methods from "twitter" library from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream # Variables that contains the user credentials to access Twitter API ACCESS_TOKEN = '918639046280253440-UJ5I3x4Ru0MhBLyVxyPefNXYq9c7KJg' ACCESS_SECRET = 'Kj8XelBrq7hS8oR5H0eqfyjJzFNJb4y9pP1pkSOdZnp8R' CONSUMER_KEY = 'bMdrEm9OEHKYeenT6OXVbsoo7' CONSUMER_SECRET = 'P6pbJA9MEa2VpDBHvEJ3BBp1XJla66mTOlgdGpMOzPqa8LSsc1' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) # Get a sample of the public data following through Twitter # iterator = twitter_stream.statuses.filter(track="", language="en") iterator = twitter_stream.statuses.sample() # Print each tweet in the stream to the screen # Here we set it to stop after getting 1000 tweets. # You don't have to set it to stop, but can continue running # the Twitter API to collect data for days or even longer. tweet_count = 1000 sample_file = open('twitter_stream_samples3.txt', 'w')
import pandas as pd from twitter import Twitter from twitter import OAuth import re import json from pandas.io.json import json_normalize ck = 'QvGBrXMEZhicazT2XzvK1usgY' #consumer key cs = 'TyYiFfvYszrjtoikhn5dlMYWljuEPGQu3iUlPZmByZAgKwTgP2' #consumer key secret at = '954075809018470400-2TRhibAbIA7OBhPUXuf6xE8NIMTWa17' ats = 'qbg2ibQ4QILAIYidy9kdANZ5cxSO2a9jCPhOIYmsGm1Wv' oauth = OAuth(at,ats,ck,cs) api = Twitter(auth=oauth) df = pd.DataFrame() mid = 0 for i in range(10): if i == 0: search_tw = api.search.tweets(q="from:sreekanth324", count=100, tweet_mode='extended') else: search_tw = api.search.tweets(q="from:sreekanth324", count=100, max_id=mid, tweet_mode='extended') dftemp = json_normalize(search_tw, 'statuses') # mid = dftemp1['id'].min() # mid=mid-1 for j in range(0, len(dftemp.index)): if dftemp['id'][j] != None: if mid == 0:
if config.LANGUAGE: WORDLIST = wordlist(config.LANGUAGE) except ImportError: WORDLIST = [ ['The space'], ['is'], ['open'], ['closed'], [''], [''] ] try: twitter = Twitter(auth=OAuth( config.OAUTH_TOKEN, config.OAUTH_SECRET, config.CONSUMER_KEY, config.CONSUMER_SECRET)) except Exception as e: print('Error in twitter init: ' + e) exit(255) def write_status(status): status_file = open(config.STATUS_FILE, 'w+') status_file.write(json.dumps({'status': status})) def generate_phrase(open_status=True): phrase = choice(WORDLIST[0]) + " " phrase += choice(WORDLIST[1]) + " "
def bot_setup(self, config_file="config.txt"): with open(config_file, "r") as in_file: for line in in_file: line = line.split(":") parameter = line[0].strip() value = line[1].strip() if parameter in [ "USERS_KEEP_FOLLOWING", "USERS_KEEP_UNMUTED", "USERS_KEEP_MUTED" ]: if value != "": self.BOT_CONFIG[parameter] = set( [int(x) for x in value.split(",")]) else: self.BOT_CONFIG[parameter] = set() elif parameter in [ "FOLLOW_BACKOFF_MIN_SECONDS", "FOLLOW_BACKOFF_MAX_SECONDS" ]: self.BOT_CONFIG[parameter] = int(value) else: self.BOT_CONFIG[parameter] = value required_parameters = [ "OAUTH_TOKEN", "OAUTH_SECRET", "CONSUMER_KEY", "CONSUMER_SECRET", "TWITTER_HANDLE", "ALREADY_FOLLOWED_FILE", "FOLLOWERS_FILE", "FOLLOWS_FILE" ] missing_parameters = [] for required_parameter in required_parameters: if (required_parameter not in self.BOT_CONFIG or self.BOT_CONFIG[required_parameter] == ""): missing_parameters.append(required_parameter) if len(missing_parameters) > 0: self.BOT_CONFIG = {} raise Exception( "Please edit %s to include the following parameters: %s.\n\n" "The bot cannot run unless these parameters are specified." % (config_file, ", ".join(missing_parameters))) for sync_file in [ self.BOT_CONFIG["ALREADY_FOLLOWED_FILE"], self.BOT_CONFIG["FOLLOWS_FILE"], self.BOT_CONFIG["FOLLOWERS_FILE"] ]: if not os.path.isfile(sync_file): with open(sync_file, "w") as out_file: out_file.write("") if (time.time() - os.path.getmtime(self.BOT_CONFIG["FOLLOWS_FILE"]) > 86400 or time.time() - os.path.getmtime(self.BOT_CONFIG["FOLLOWERS_FILE"]) > 86400): print( "Warning: Your Twitter follower sync files are more than a day old. " "It is highly recommended that you sync them by calling sync_follows() " "before continuing.", file=sys.stderr) self.TWITTER_CONNECTION = Twitter(auth=OAuth( self.BOT_CONFIG["OAUTH_TOKEN"], self.BOT_CONFIG["OAUTH_SECRET"], self.BOT_CONFIG["CONSUMER_KEY"], self.BOT_CONFIG["CONSUMER_SECRET"]))