def find_user(token, graph_api_user, with_friends=True, retrieved_time=datetime.utcnow(), with_picture=False): """Retrieve a User resource from the Graph API. Args: graph_api_user (str): The URL of the user node within the Graph API Returns: dict. A User resource. """ # Start to explore the Graph # user = facebook.get('/' + graph_api_user) user = graph_api_get(token, '/' + graph_api_user) # Retrieve the admin groups # accounts = facebook.get('/' + graph_api_user + '/accounts') accounts = graph_api_get(token, '/' + graph_api_user + '/accounts') if 'gender' in user.data: gender = user.data['gender'] else: gender = None # Calculate the age if 'birthday' in user.data: user.data['age'] = user.data['birthday'] current_date = datetime.utcnow() # If only a month and day are specified, do not attempt to calculate the age of the user if re.match(r'\d{2}/\d{2}$', user.data['birthday']): age = None else: birth_date = datetime.strptime( user.data['birthday'], '%m/%d/%Y' ) age = abs((current_date - birth_date).days / 365) else: age = None # Formatting for the hometown if 'hometown' in user.data: user.data['hometown_name'] = user.data['hometown']['name'] else: user.data['hometown_name'] = '' # Formatting for the location if 'location' in user.data: user.data['location_name'] = user.data['location']['name'] else: user.data['location_name'] = '' admin = is_admin(user.data['id']) if with_picture: # Picture URL # picture = facebook.get('/' + graph_api_user + '/picture?redirect=false&type=large') picture = graph_api_get(token, '/' + graph_api_user + '/picture?redirect=false&type=large') picture_url = picture.data['data']['url'] # Download the picture h = httplib2.Http() (resp_headers, picture_resp) = h.request(picture_url, "GET") # os.path.dirname(__file__) picture_file = open('tmp/user_picture.jpg', 'wb') picture_file.write(picture_resp) picture_file.close() else: picture_url = 'http://facebook.com' # Serialize the User # Only retrieve the User as its been serialized # user_objects = User.objects(facebook_id=user.data['id']) user_objects = User.objects(facebook_id=user.data['id'],retrieved_time=retrieved_time) if len(user_objects) > 1: raise Exception("More than one User serialized for %s" % user_objects.facebook_id) elif len(user_objects) < 1: user_object = User(facebook_id=user.data['id'], name=user.data['name'], age=age, gender=gender, hometown_name=user.data['hometown_name'], location_name=user.data['location_name'], friends=[], admin=admin, picture_url=picture_url, retrieved_time=retrieved_time ) if with_picture: user_object.picture.put('tmp/user_picture.jpg') user_object.save() else: user_object = user_objects.first() user_object.facebook_id = user.data['id'] user_object.age = age user_object.gender = gender user_object.hometown_name = user.data['hometown_name'] user_object.location_name = user.data['location_name'] user_object.admin=admin user_object.picture_url=picture_url if with_picture: user_object.picture.replace('tmp/user_picture.jpg') user_object.save() if with_picture: os.remove('tmp/user_picture.jpg') # Retrieve the friends if with_friends: friends = find_friends(token, user.data['id'], retrieved_time) user_object.friends = friends user_object.save() return user_object
def download(): """Download an export of Graph API data for any given User """ downloaded_resources = [] users = [] posts = [] friendships = [] publishings = [] comments = [] # Retrieve all shares left for posts by friends sharedposts = [] # Retrieve all users who liked the post by this friend likes = [] # Retrieve all reactions to the post by this friend reactions = [] user = None token = get_facebook_oauth_token() if token: user = facebook.get('/me') if not is_admin(user.data['id']): return redirect(url_for('index')) min_date_str = request.args.get('min_date', None) max_date_str = request.args.get('max_date', None) # Filter for posts if min_date_str is not None and max_date_str is not None: min_date_str = re.sub(r'\.\d{3}Z$', '', min_date_str) max_date_str = re.sub(r'\.\d{3}Z$', '', max_date_str) min_date = datetime.strptime(min_date_str, '%Y-%m-%dT%H:%M:%S') max_date = datetime.strptime(max_date_str, '%Y-%m-%dT%H:%M:%S') else: min_date = datetime(1, 1, 1) max_date = datetime.utcnow() graph_api_user = request.args.get('user', None) if graph_api_user is not None: # Start to explore the Graph # graph_users = [ find_user(token, graph_api_user, retrieved_time=retrieved_time) ] graph_users = User.objects(facebook_id=user.data['id']) # graph_users = [] # for graph_user in User.objects(facebook_id=user.data['id']): # graph_users.append(graph_user) else: # Retrieve serialized User resources graph_users = User.objects() # graph_users = [] # for graph_user in User.objects(): # graph_users.append(graph_user) post_users = {} for this_user in graph_users: if not this_user.facebook_id in downloaded_resources: users.append(this_user) downloaded_resources.append(this_user.facebook_id) # Retrieve the friends friends = this_user.friends for friend in friends: if not friend.facebook_id in downloaded_resources: # Add the friend as a user users.append(friend) downloaded_resources.append(friend.facebook_id) # Add the friendship friendships.append({'User A ID': this_user.facebook_id, 'User B ID': friend.facebook_id}) # Retrieve the posts # user_posts = find_posts(token, this_user, '/' + str(this_user.facebook_id) + '/feed', min_date, max_date, retrieved_time) user_posts = Post.objects(user=this_user) # user_posts = [] # for user_post in Post.objects(user=user): # user_posts.append(user_post) for post in user_posts: if not post.facebook_id in downloaded_resources: posts.append(post) downloaded_resources.append(post.facebook_id) if post.user is not None: post_users[post.user.facebook_id] = post.user # Add the publishings publishings.append({ 'User ID': post.user.facebook_id, 'Post ID': post.facebook_id }) # Graph API version 2.6 or later required post_comments = find_comments(token, post, min_date, max_date) comments.extend(post_comments) post_sharedposts = find_sharedposts(post, min_date, max_date) # Temporarily concatenate all comments and shares comments.extend(sharedposts) else: return redirect(url_for('index')) workbook = xlsxwriter.Workbook('tmp/socialmemories.xlsx') users_worksheet = workbook.add_worksheet('Users') posts_worksheet = workbook.add_worksheet('Posts') friendships_worksheet = workbook.add_worksheet('Friendships') publishings_worksheet = workbook.add_worksheet('Publishings') comments_worksheet = workbook.add_worksheet('Comments') denorm_posts_worksheet = workbook.add_worksheet('Denormalized Posts') # Create the classifier # Naive Bayes is performant (if not that accurate) human_names = [(name, 'male') for name in names.words('male.txt')] + [(name, 'female') for name in names.words('female.txt')] features = [({'name': name}, gender) for (name, gender) in human_names] training_set = features[500:] test_set = features[:500] classifier = NaiveBayesClassifier.train(training_set) export_users(csv, users, users_worksheet, classifier) export_posts(csv, posts, posts_worksheet, denorm_posts_worksheet, post_users, classifier) export_comments(csv, comments, comments_worksheet) export_friendships(csv, friendships, friendships_worksheet) export_publishings(csv, publishings, publishings_worksheet) workbook.close() return send_file('tmp/socialmemories.xlsx', as_attachment=True)