def _create_author_by_row(self, record_dict): author = Author() author_osn_id = self._convert_to_unicode_value(record_dict["tumblog_id"]) author.author_osn_id = author_osn_id author.name = author_osn_id author.domain = self._domain author.author_guid = compute_author_guid_by_author_name(author.name) tumblr_blog_name = self._convert_to_unicode_value(record_dict["tumblr_blog_name"]) author.author_screen_name = tumblr_blog_name author.description = self._convert_to_unicode_value(record_dict["tumblr_blog_description"]) created_time_epoch = self._convert_to_unicode_value(record_dict["created_time_epoch"]) if created_time_epoch is not None: datetime, str_datetime = convert_epoch_timestamp_to_datetime(created_time_epoch) author.created_at = str_datetime else: author.created_at = self._set_start_date() author.url = self._convert_to_unicode_value(record_dict["tumblr_blog_url"]) author.protected = get_boolean_value(record_dict["is_private"]) author.time_zone = self._convert_to_unicode_value(record_dict["timezone"]) author.language = self._convert_to_unicode_value(record_dict["language"]) is_private = record_dict["is_private"] if is_private == "TRUE": author.protected = 1 else: author.protected = 0 return author
def setUp(self): self.config = getConfig() self._db = DB() self._db.setUp() self.timeline_overlap = TimelineOverlapVisualizationGenerator() author1 = Author() author1.name = 'acquired_user' author1.domain = 'Microblog' author1.author_guid = 'acquired_user' author1.author_screen_name = 'acquired_user' author1.author_full_name = 'acquired_user' author1.author_osn_id = 1 author1.created_at = datetime.datetime.now() author1.missing_data_complementor_insertion_date = datetime.datetime.now( ) author1.xml_importer_insertion_date = datetime.datetime.now() author1.author_type = 'bad_actor' author1.author_sub_type = 'acquired' self._db.add_author(author1) for i in range(1, 11): post1 = Post() post1.post_id = 'bad_post' + str(i) post1.author = 'acquired_user' post1.guid = 'bad_post' + str(i) post1.date = datetime.datetime.now() post1.domain = 'Microblog' post1.author_guid = 'acquired_user' post1.content = 'InternetTV love it' + str(i) post1.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post1) author = Author() author.name = 'TestUser1' author.domain = 'Microblog' author.author_guid = 'TestUser1' author.author_screen_name = 'TestUser1' author.author_full_name = 'TestUser1' author.author_osn_id = 2 author.created_at = datetime.datetime.now() author.missing_data_complementor_insertion_date = datetime.datetime.now( ) author.xml_importer_insertion_date = datetime.datetime.now() self._db.add_author(author) for i in range(1, 11): post = Post() post.post_id = 'TestPost' + str(i) post.author = 'TestUser1' post.guid = 'TestPost' + str(i) post.date = datetime.datetime.now() post.domain = 'Microblog' post.author_guid = 'TestUser1' post.content = 'InternetTV love it' + str(i) post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit()
def _get_source_id_source_element_dict(self, source_ids, targeted_fields_dict): source_id_source_element_dict = defaultdict() source_table_name = targeted_fields_dict['source']['table_name'] source_table_id = targeted_fields_dict['source']['id'] elements = self._db.get_table_elements_by_where_cluases(source_table_name, []) author_guid_author_dict = self._db.get_author_dictionary() id_set = set(source_ids) for temp_author in elements: source_id = getattr(temp_author, source_table_id) if source_id not in id_set or source_id is None: continue if isinstance(temp_author, Author): author = author_guid_author_dict[temp_author.author_guid] elif source_table_id == u"author_guid": author = author_guid_author_dict[source_id] elif hasattr(temp_author, u"author_guid"): author = author_guid_author_dict[getattr(temp_author, u"author_guid")] else: author = Author() author.author_guid = source_id author.statuses_count = len(targeted_fields_dict) if hasattr(temp_author, 'created_at'): author.created_at = temp_author.created_at source = author source_id_source_element_dict[source_id] = source return source_id_source_element_dict
def _parse_author(self, json_content): author = Author() post_id = json_content["id"] author.name = post_id author.author_screen_name = post_id author.author_osn_id = post_id author.domain = self._domain str_post_timestamp = json_content["postTimestamp"] post_timestamp, str_post_date = self._get_str_and_date_formats( str_post_timestamp) author.created_at = str(str_post_date) post_guid = compute_post_guid(self._social_network_url, post_id, str_post_date) author.author_guid = post_guid if post_id in self._post_id_targeted_class_dict: targeted_class = self._post_id_targeted_class_dict[post_id] author.author_type = targeted_class post_media = json_content["postMedia"] if len(post_media) > 0: author.media_path = post_media[0] else: author.media_path = None author.notifications = self._dataset_type return author
def _create_defult_author(self, source_id, targeted_fields_dict, temp_author): author = Author() author.author_guid = source_id author.statuses_count = len(targeted_fields_dict) if hasattr(temp_author, 'date'): author.created_at = getattr(temp_author, 'date') return author
def _add_author(self, author_guid): author = Author() author.author_guid = author_guid author.author_full_name = u'test author' author.author_screen_name = author_guid author.name = u'test' author.domain = u'tests' author.statuses_count = 0 author.created_at = u"2017-06-14 05:00:00" # self._db.add_author(author) self._author = author
def _convert_reddit_author_to_author(self, redditor): author = Author() author.name = getattr(redditor, 'name', '') author.author_screen_name = author.name author.author_guid = compute_author_guid_by_author_name(author.name) author.domain = u'reddit' author.created_at = datetime.fromtimestamp(getattr(redditor, 'created_utc', 0)) author.author_osn_id = getattr(redditor, 'id', '') author.author_full_name = getattr(redditor, 'fullname', '') author.url = u'https://www.reddit.com/user/' + redditor.name return author
def liker_to_author(self, liker, photo_id): author = Author() author.name = str(liker['username']) author.author_full_name = str(liker.get('realname', "")) author.domain = str(photo_id) author.created_at = str(liker["favedate"]) author.author_osn_id = str(liker['nsid']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.author_type = 'like' return author
def commenter_to_author(self, commenter, photo_id): author = Author() author.name = str(commenter['authorname']) author.author_screen_name = str(commenter['path_alias']) author.author_full_name = str(commenter.get('realname', "")) author.url = str(commenter['permalink']) author.domain = str(photo_id) author.created_at = str(commenter["datecreate"]) author.author_osn_id = str(commenter['author']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.author_type = 'comment' return author
def owner_to_author(self, jsonresponse): author = Author() author.name = str(jsonresponse['photo']['owner']['username']) author.author_full_name = str( jsonresponse['photo']['owner']['realname']) author.domain = str(jsonresponse['photo']['id']) author.created_at = str(jsonresponse['photo']['dates']["posted"]) author.author_osn_id = str(jsonresponse['photo']['owner']['nsid']) author.author_guid = compute_author_guid_by_author_name( author.author_osn_id) author.location = str(jsonresponse['photo']['owner']['location']) author.author_type = 'owner' return author
def parse_row(self, row): try: author = Author() author.author_type = self.parse_type(row['Risk']) if author.author_type == -1: return None author.author_full_name = unicode(row['AKA Name']).encode('ascii', 'ignore').decode('ascii') author.location = unicode(row['City'] + ' ' + row['Address']) author.geo_enabled = unicode(row['Location']) author.name=row['DBA Name'] author.created_at = unicode(row['Inspection Date']) return author except: return None
def parse_row(self, row): try: author = Author() author.author_type = self.parse_type(row['risk_category']) if author.author_type == -1: return None author.author_full_name = unicode(row['business_name']).encode('ascii', 'ignore').decode('ascii') author.name = unicode(author.author_full_name).encode('ascii', 'ignore').decode('ascii') author.location = unicode(row['business_city'] + ' ' + row['business_address']) author.geo_enabled = unicode(row['business_location']) if author.geo_enabled is None or author.geo_enabled == "": return None author.created_at = unicode(row['inspection_date']) return author except: logging.info("error with row:"+ str(row)) return None
def setUp(self): self._db = DB() self._db.setUp() self.author_guid = u"author_guid" author = Author() author.author_guid = self.author_guid author.author_full_name = u'author' author.name = u'author_name' author.author_screen_name = u'author_screen_name' author.domain = u'Microblog' author.statuses_count = 10 author.friends_count = 5 author.followers_count = 6 author.favourites_count = 8 author.author_sub_type = u"bot" author.author_type = u"bad" author.created_at = u"2017-06-17 05:00:00" author.default_profile = True author.default_profile_image = True author.verified = True self._db.add_author(author) post = Post() post.author = self.author_guid post.author_guid = self.author_guid post.content = u"content" post.title = u"title" post.domain = u"domain" post.post_id = u"post_id" post.guid = post.post_id post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00") post.created_at = post.date self._db.addPost(post) self._db.session.commit() self.feature_prefix = u"AccountPropertiesFeatureGenerator_" self.account_properties_feature_generator = AccountPropertiesFeatureGenerator( self._db, **{ 'authors': [author], 'posts': { self.author_guid: [post] } }) self.account_properties_feature_generator.execute()
def _convert_source_to_author(self, source_id, targeted_fields_dict): source_table_name = targeted_fields_dict['source']['table_name'] source_table_id = targeted_fields_dict['source']['id'] elements = self._db.get_table_elements_by_ids(source_table_name, source_table_id, [source_id]) temp_author = elements[0] if isinstance(temp_author, Author): author = temp_author elif source_table_id == u"author_guid": author = self._db.get_author_by_author_guid(source_id) elif hasattr(temp_author, u"author_guid"): author = self._db.get_author_by_author_guid(getattr(temp_author, u"author_guid")) else: author = Author() author.author_guid = source_id author.statuses_count = len(targeted_fields_dict) if hasattr(temp_author, 'created_at'): author.created_at = temp_author.created_at return author