#!/usr/bin/python3 import datetime import config, people, twitter_api token = config.load_token('/vagrant/token/twitter.json') users = people.load('./people.json.pretty') today = datetime.date.today() i = 0 for user_id, user in users.items(): i += 1 print(i) u = twitter_api.user(token, user_id) if u is None: continue if 'since_id' not in user['twitter']: user['twitter']['since_id'] = -1 since_id = user['twitter']['since_id'] ids = twitter_api.user_tweets_id(token, user_id, since_id) if '_activity' not in user['twitter']: user['twitter']['_activity'] = {} activity = { 'statuses_count': u['statuses_count'],
#!/usr/bin/python3 import re import people users = people.load('./people.json') for user_id, user in users.items(): if 'entities' in user['twitter']: entities = user['twitter']['entities'] urls = [] if 'description' in entities: urls.extend(entities['description']['urls']) if 'url' in entities: urls.extend(entities['url']['urls']) for url in urls: if 'expanded_url' in url and url['expanded_url'] is not None: match = re.match('^.*youtube\.com/watch\?v=.*$', url['expanded_url']) if not match: match = re.match('^.*youtube\.com/(.*/)*(.*)$', url['expanded_url']) if match: user['youtube'] = {'id':match.group(2)} people.save(users, './people.json')
#!/usr/bin/python3 import datetime today = str(datetime.date.today()) import people persons = people.load('/vagrant/data/people.json') import json, requests from pyquery import PyQuery for twitter_id, person in persons.items(): if 'fitbit' not in person: continue fitbit = person['fitbit'] if 'url' not in fitbit: print('[WARN][' + twitter_id + '] fitbit url is missing') continue url = fitbit['url'] r = requests.get(url) pq = PyQuery(r.text) height = pq('li.user-stat.height') location = pq('li.user-stat.location') joined = pq('li.user-stat.location') aboutme = pq('div.content.firstContent') lifetime = pq('div.tabsData.lifetime.cached')
#!/usr/bin/python3 import jellyfish, json, os, re import people from url import expand # work dir work_dir = os.environ['WORKDIR'] twitter_dir = os.environ['TWITTERDIR'] # load people people_path = work_dir + '/data/people.json' persons = people.load(people_path) # load tweets tweets_path = twitter_dir + '/3data/url_facebook.json' with open(tweets_path, 'r') as tweets_file: tweets = json.load(tweets_file) # for each tweet i = 0 for tweet in tweets['tweets']: i += 1 user = tweet['user'] tw_name = user['name'] tw_username = user['screen_name'] urls = tweet['entities']['urls'] # for each url
#!/usr/bin/python3 import datetime today = str(datetime.date.today()) import people persons = people.load('/vagrant/data/people.json.pretty') import json, requests from pyquery import PyQuery for twitter_id, person in persons.items(): if 'linkedin' not in person: continue linkedin = person['linkedin'] if 'url' not in linkedin: print('[WARN][' + twitter_id + '] linkedin url is missing') continue url = linkedin['url'] url = 'https://pinterest.com/larryconlin' r = requests.get(url) with open('/tmp/index.html', 'w') as file: file.write(r.text) pq = PyQuery(r.text) div = pq('div.profile-picture') print(div.text())