def setUp(self): self.config = getConfig() self._db = DB() self._db.setUp() self.timeline_overlap = TimelineOverlapVisualizationGenerator() author1 = Author() author1.name = 'acquired_user' author1.domain = 'Microblog' author1.author_guid = 'acquired_user' author1.author_screen_name = 'acquired_user' author1.author_full_name = 'acquired_user' author1.author_osn_id = 1 author1.created_at = datetime.datetime.now() author1.missing_data_complementor_insertion_date = datetime.datetime.now( ) author1.xml_importer_insertion_date = datetime.datetime.now() author1.author_type = 'bad_actor' author1.author_sub_type = 'acquired' self._db.add_author(author1) for i in range(1, 11): post1 = Post() post1.post_id = 'bad_post' + str(i) post1.author = 'acquired_user' post1.guid = 'bad_post' + str(i) post1.date = datetime.datetime.now() post1.domain = 'Microblog' post1.author_guid = 'acquired_user' post1.content = 'InternetTV love it' + str(i) post1.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post1) author = Author() author.name = 'TestUser1' author.domain = 'Microblog' author.author_guid = 'TestUser1' author.author_screen_name = 'TestUser1' author.author_full_name = 'TestUser1' author.author_osn_id = 2 author.created_at = datetime.datetime.now() author.missing_data_complementor_insertion_date = datetime.datetime.now( ) author.xml_importer_insertion_date = datetime.datetime.now() self._db.add_author(author) for i in range(1, 11): post = Post() post.post_id = 'TestPost' + str(i) post.author = 'TestUser1' post.guid = 'TestPost' + str(i) post.date = datetime.datetime.now() post.domain = 'Microblog' post.author_guid = 'TestUser1' post.content = 'InternetTV love it' + str(i) post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit()
def get_liked_pages_from_user(self): """ Method gets liked pages from users in the config.ini osn_ids. Assumption: The users in the osn_ids fields in config.ini ARE NOT IN THE DB """ self._facebook_login() for osn_id in self.osn_ids: author = Author() self.driver.get('https://www.facebook.com/' + osn_id) time.sleep(2) a_element = self.driver.find_element_by_xpath( "//a[@class='_2nlw _2nlv']") href_attribute = a_element.get_attribute('href') name = a_element.text # Extracting name unique_user_name = self._parse_unique_user_name(href_attribute) author.name = name author.author_screen_name = unique_user_name author.author_osn_id = osn_id author.author_guid = commons.compute_author_guid_by_osn_id( author.author_osn_id) author.domain = self._domain author.author_type = "User" pages_id_to_name_dict = self._get_liked_pages_from_user( osn_id, unique_user_name) page_authors = self._convert_pages_to_authors( pages_id_to_name_dict) page_authors.append( author ) # Adding user created author to page author list to add to DB self._db.addPosts(page_authors) connections = self._convert_page_and_user_to_connection( page_authors, author) self._db.addPosts(connections)
def get_about_info_from_users(self): self._facebook_login() authors = [] for author_osn_id in self.osn_ids: author = Author() author.domain = self._domain author.author_osn_id = author_osn_id author.author_type = 'User' author.education = 'User Blocked' author.professional_skills = 'User Blocked' author.past_residence = 'User Blocked' author.birth_day = 'User Blocked' author.gender = 'User Blocked' author.gender = 'User Blocked' author.email = 'User Blocked' # Need to add the rest of the features with User Blocked as default. author.work = 'User Blocked' self.driver.get('https://www.facebook.com/' + author_osn_id) a_element = self.driver.find_element_by_xpath( "//a[@class='_2nlw _2nlv']") href_attribute = a_element.get_attribute('href') name = a_element.text # Extracting name unique_user_name = self._parse_unique_user_name(href_attribute) author.name = name author.author_screen_name = unique_user_name author.author_guid = commons.compute_author_guid_by_osn_id( author_osn_id) authors.append(author) self._get_about_info_for_authors(authors) self._db.addPosts(authors)
def _create_author_by_row(self, record_dict): author = Author() author_osn_id = self._convert_to_unicode_value(record_dict["tumblog_id"]) author.author_osn_id = author_osn_id author.name = author_osn_id author.domain = self._domain author.author_guid = compute_author_guid_by_author_name(author.name) tumblr_blog_name = self._convert_to_unicode_value(record_dict["tumblr_blog_name"]) author.author_screen_name = tumblr_blog_name author.description = self._convert_to_unicode_value(record_dict["tumblr_blog_description"]) created_time_epoch = self._convert_to_unicode_value(record_dict["created_time_epoch"]) if created_time_epoch is not None: datetime, str_datetime = convert_epoch_timestamp_to_datetime(created_time_epoch) author.created_at = str_datetime else: author.created_at = self._set_start_date() author.url = self._convert_to_unicode_value(record_dict["tumblr_blog_url"]) author.protected = get_boolean_value(record_dict["is_private"]) author.time_zone = self._convert_to_unicode_value(record_dict["timezone"]) author.language = self._convert_to_unicode_value(record_dict["language"]) is_private = record_dict["is_private"] if is_private == "TRUE": author.protected = 1 else: author.protected = 0 return author
def _add_author(self, name=None, link_karma=None, comment_karma=None, is_employee=0, is_mod=0, is_gold=0, author_osn_id=None): author = Author() reddit_author = RedditAuthor() author.name = name author.author_screen_name = author.name author.author_guid = compute_author_guid_by_author_name(author.name) author.domain = 'reddit' author.author_osn_id = author_osn_id author.author_full_name = name author.url = 'https://www.reddit.com/user/' + name reddit_author.name = author.name reddit_author.author_guid = author.author_guid reddit_author.comments_count = None reddit_author.comment_karma = comment_karma reddit_author.link_karma = link_karma reddit_author.is_gold = is_gold reddit_author.is_moderator = is_mod reddit_author.is_employee = is_employee self._db.add_authors([author]) self._db.add_reddit_authors([reddit_author])
def _parse_author(self, json_content): author = Author() post_id = json_content["id"] author.name = post_id author.author_screen_name = post_id author.author_osn_id = post_id author.domain = self._domain str_post_timestamp = json_content["postTimestamp"] post_timestamp, str_post_date = self._get_str_and_date_formats( str_post_timestamp) author.created_at = str(str_post_date) post_guid = compute_post_guid(self._social_network_url, post_id, str_post_date) author.author_guid = post_guid if post_id in self._post_id_targeted_class_dict: targeted_class = self._post_id_targeted_class_dict[post_id] author.author_type = targeted_class post_media = json_content["postMedia"] if len(post_media) > 0: author.media_path = post_media[0] else: author.media_path = None author.notifications = self._dataset_type return author
def liker_to_author(self, liker, photo_id): author = Author() author.name = str(liker['username']) author.author_full_name = str(liker.get('realname', "")) author.domain = str(photo_id) author.created_at = str(liker["favedate"]) author.author_osn_id = str(liker['nsid']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.author_type = 'like' return author
def _convert_reddit_author_to_author(self, redditor): author = Author() author.name = getattr(redditor, 'name', '') author.author_screen_name = author.name author.author_guid = compute_author_guid_by_author_name(author.name) author.domain = u'reddit' author.created_at = datetime.fromtimestamp(getattr(redditor, 'created_utc', 0)) author.author_osn_id = getattr(redditor, 'id', '') author.author_full_name = getattr(redditor, 'fullname', '') author.url = u'https://www.reddit.com/user/' + redditor.name return author
def _create_author(self, guid, author_type): author = Author() author.name = unicode(guid) author.domain = u'Microblog' author.author_guid = unicode(guid) author.author_screen_name = u'TestUser1' author.author_type = author_type author.domain = u'Restaurant' author.author_osn_id = 1 self._authors_to_author_features_dict[author.author_guid]=[] self._db.add_author(author)
def _json_user_to_db_author_converter(self, user, domain='Instagram_author'): author = Author() author.name = user['username'] author.author_screen_name = author.name author.author_guid = compute_author_guid_by_author_name(author.name) author.domain = domain author.author_type = domain author.author_osn_id = user['id'] author.author_full_name = user['full_name'] author.description = user.setdefault('biography', None) author.url = 'https://www.instagram.com/' + author.author_screen_name author.profile_image_url = user['profile_pic_url'] return author
def owner_to_author(self, jsonresponse): author = Author() author.name = str(jsonresponse['photo']['owner']['username']) author.author_full_name = str( jsonresponse['photo']['owner']['realname']) author.domain = str(jsonresponse['photo']['id']) author.created_at = str(jsonresponse['photo']['dates']["posted"]) author.author_osn_id = str(jsonresponse['photo']['owner']['nsid']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.location = str(jsonresponse['photo']['owner']['location']) author.author_type = 'owner' return author
def _create_author_by_citation(self, reblogged_from_metadata): author = Author() parent_post_blog_id = reblogged_from_metadata["parent_post_blog_id"] author.author_osn_id = parent_post_blog_id author.name = parent_post_blog_id author.author_guid = compute_author_guid_by_author_name(author.name) parent_post_blog_name = self._convert_to_unicode_value(reblogged_from_metadata["parent_post_blog_name"]) author.author_screen_name = parent_post_blog_name author.url = self._convert_to_unicode_value(reblogged_from_metadata["parent_post_short_url"]) author.domain = self._domain return author
def commenter_to_author(self, commenter, photo_id): author = Author() author.name = str(commenter['authorname']) author.author_screen_name = str(commenter['path_alias']) author.author_full_name = str(commenter.get('realname', "")) author.url = str(commenter['permalink']) author.domain = str(photo_id) author.created_at = str(commenter["datecreate"]) author.author_osn_id = str(commenter['author']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.author_type = 'comment' return author
def _convert_group_to_author(self): """ Method takes given group_id from config.ini and creates Author object for it. """ author = Author() author.name = self.get_group_name() author.author_osn_id = self._group_id author.author_guid = commons.compute_author_guid_by_osn_id( author.author_osn_id) author.domain = self._domain author.author_type = "Group" author.followers_count = self.get_group_number_of_members() author.author_sub_type = self.get_group_level_of_activity() return author
def _convert_pages_to_authors(self, pages_id_to_name_dict): """ :return:a list of Author objects ready to be added to DB """ authors = [] for page_id in pages_id_to_name_dict: author = Author() author.name = pages_id_to_name_dict[page_id] author.author_osn_id = page_id author.author_guid = commons.compute_author_guid_by_osn_id( author.author_osn_id) author.domain = self._domain author.author_type = "Page" authors.append(author) return authors
def _create_author(self, post, dataset_affiliation): author = Author() author_name = post.author author.name = author_name author.author_screen_name = author_name author_guid = compute_author_guid_by_author_name(author_name) author.author_osn_id = author_guid author.author_guid = author_guid author.domain = self._domain author.author_type = post.post_type author.notifications = dataset_affiliation return author
def _convert_group_members_to_author(self, users_id_to_name_dict): """ :return: a list of Author objects ready to be added to DB. """ authors = [] for user_id in users_id_to_name_dict: author = Author() name_and_screen_name = users_id_to_name_dict[user_id] author.name = name_and_screen_name[0] author.author_screen_name = name_and_screen_name[1] author.author_osn_id = user_id author.author_guid = commons.compute_author_guid_by_osn_id( author.author_osn_id) author.domain = self._domain author.author_type = "User" authors.append(author) return authors
def updateAuthorsData(self): list_to_add = [] for dic in self._author_prop_dict: try: author = Author() author.name = dic['author'] author.domain = unicode('Microblog') author.author_osn_id = dic['author_osn_id'] author.author_guid = dic['author_guid'] author.followers_count = dic['followers_count'] author.location = dic['location'] author.favourites_count = dic['favorite'] author.description = dic['description'] author.url = dic['url'] list_to_add.append(author) except (ValueError, TypeError, KeyError) as e: logging.warn("Failed to add author: {0} - {1}".format(author.name, e)) self._db.update_authors(list_to_add)