def __init__(self, db): Method_Executor.__init__(self, db) self._twitter_api = TwitterApiRequester() self._features = self._config_parser.eval(self.__class__.__name__, "features") self._group_guid = self._config_parser.eval(self.__class__.__name__, "group_guid")
def __init__(self, db): Method_Executor.__init__(self, db) self._twitter_api = TwitterApiRequester() self._social_network_crawler = Twitter_Rest_Api(db) self._target_id = self._config_parser.eval(self.__class__.__name__, "target_id") self._source_id = self._config_parser.eval(self.__class__.__name__, "source_id") self.source_username = self._config_parser.eval(self.__class__.__name__, "source_username")
def __init__(self, db): Method_Executor.__init__(self, db) self._account_user_name = self._config_parser.eval( self.__class__.__name__, "account_user_name") self._account_user_pw = self._config_parser.eval( self.__class__.__name__, "account_user_pw") self._group_id = self._config_parser.eval(self.__class__.__name__, "group_id") self._group_name = None self.osn_ids = self._config_parser.eval(self.__class__.__name__, "osn_ids") self._domain = 'Facebook' self._author_guid_author_dict = {} self._number_of_scrolls = self._config_parser.eval( self.__class__.__name__, "number_of_scrolls") self._number_of_liked_pages_to_collect = self._config_parser.eval( self.__class__.__name__, "number_of_liked_pages_to_collect") #TODO: consider renaming this var _num_of_scrolls_in_group_members_page options = webdriver.FirefoxOptions() options.set_preference( "dom.push.enabled", False) # Setting firefox options to disable push notifications self.driver = webdriver.Firefox( executable_path=r'C:\Python27\geckodriver.exe', firefox_options=options) self.driverWait = WebDriverWait( self.driver, 4) # Waiting threshold for loading page is 10 seconds
def __init__(self, db): Method_Executor.__init__(self, db) self._input_csv_file = self._config_parser.eval( self.__class__.__name__, "input_csv_file") # There is no author so the website would be the author. We should not include this author in the analysis. self._author_name = unicode("snopes")
def __init__(self, db, **kwargs): Method_Executor.__init__(self, db) self._post_id_author_guid_dict = self._db.get_post_id_to_author_guid_mapping() self._word_id_dict = {} self._id_word_dict = {} self._author_features = [] self._post_id_words_dict = {} self._authors_image_tags = self._db.get_authors_and_image_tags() self._word_id = 1
def __init__(self, db): Method_Executor.__init__(self, db) self._input_directory_path = self._config_parser.eval( self.__class__.__name__, "input_directory_path") self._output_directory_path = self._config_parser.eval( self.__class__.__name__, "output_directory_path") self._tar_files_for_downloading_urls = self._config_parser.eval( self.__class__.__name__, "tar_files_for_downloading_urls") self._max_num_of_threads = self._config_parser.eval( self.__class__.__name__, "max_num_of_threads")
def __init__(self, db): Method_Executor.__init__(self, db) # taken from http://techslides.com/hacking-the-google-trends-api self._url = "https://trends.google.com/trends/hottrends/atom/feed?pn=p1" self._retrieve_news_by_keywords = self._config_parser.eval( self.__class__.__name__, "retrieve_news_by_keywords") self._num_of_top_terms = self._config_parser.eval( self.__class__.__name__, "num_of_top_terms") self._generic_twitter_crawler = Generic_Twitter_Crawler(self._db) self._topic_term_manager = Topic_Term_Manager(db) self._twitter_rest_api = Twitter_Rest_Api(db)
def __init__(self, db): Method_Executor.__init__(self, db) self._topics_path = self._config_parser.eval(self.__class__.__name__, "topics_path") self._judgment_path = self._config_parser.eval(self.__class__.__name__, "judgment_path") self._num_of_relevant_tweets = self._config_parser.eval( self.__class__.__name__, "num_of_relevant_tweets") self._num_of_description_words = self._config_parser.eval( self.__class__.__name__, "num_of_description_words") self._twitter_api = Twitter_Rest_Api(db)
def __init__(self, db): Method_Executor.__init__(self, db) self._twitter_api = TwitterApiRequester() self._social_network_crawler = Twitter_Rest_Api(db) self._influence_strategy = self._config_parser.eval(self.__class__.__name__, "post_strategy") self._source_group = self._config_parser.eval(self.__class__.__name__, "source_group") self._target_group = self._config_parser.eval(self.__class__.__name__, "target_group") self._user_id = self._config_parser.eval(self.__class__.__name__, "user_id") self._number_of_posts = self._config_parser.eval(self.__class__.__name__, "number_of_posts") self._retweet_precent = self._config_parser.eval(self.__class__.__name__, "retweet_precent") self._related_hashtags = self._config_parser.eval(self.__class__.__name__, "related_hashtags") self._posts_num = self._config_parser.eval(self.__class__.__name__, "posts_num")
def __init__(self, db): Method_Executor.__init__(self, db) self._input_directory_path = self._config_parser.eval(self.__class__.__name__, "input_directory_path") self._target_file_name = self._config_parser.eval(self.__class__.__name__, "target_file_name") self._output_directory_path = self._config_parser.eval(self.__class__.__name__, "output_directory_path") self._results_file_name = self._config_parser.eval(self.__class__.__name__, "results_file_name") self._total_requests = 0 self._max_requests_per_hour = 1000 self._name_related_name_tuples = [] self._name_related_name_df = pd.DataFrame() self._name_not_found_tuples = [] self._name_not_found_df = pd.DataFrame()
def __init__(self, db): Method_Executor.__init__(self, db) # taken from http://techslides.com/hacking-the-google-trends-api self._google_trends_url = "https://trends.google.com/trends/hottrends/atom/feed?pn=p1" self._retrieve_news_by_keywords = self._config_parser.eval( self.__class__.__name__, "retrieve_news_by_keywords") self._num_of_top_terms = self._config_parser.eval( self.__class__.__name__, "num_of_top_terms") #self._keys = self._config_parser.eval(self.__class__.__name__, "keys_list") self._query = self._config_parser.eval(self.__class__.__name__, "query") self._generic_webcrawlers = GenericWebCrawlers(self._db, self._query) self._topic_term_manager = Topic_Term_Manager(db)
def __init__(self, db): Method_Executor.__init__(self, db) self._input_directory_path = self._config_parser.eval( self.__class__.__name__, "input_directory_path") self._target_file_name = self._config_parser.eval( self.__class__.__name__, "target_file_name") self._ground_truth_file_name = self._config_parser.eval( self.__class__.__name__, "ground_truth_file_name") self._output_directory_path = self._config_parser.eval( self.__class__.__name__, "output_directory_path") self._results_file_name = self._config_parser.eval( self.__class__.__name__, "results_file_name") self._ranking_function = self._config_parser.eval( self.__class__.__name__, "ranking_function")
def __init__(self, db): Method_Executor.__init__(self, db) self._max_num_tweets = self._config_parser.eval(self.__class__.__name__, "max_num_tweets") self._max_num_of_objects_without_saving = self._config_parser.eval(self.__class__.__name__, "max_num_of_objects_without_saving") self._month_interval = self._config_parser.eval(self.__class__.__name__, "month_interval") self._output_folder_full_path = self._config_parser.eval(self.__class__.__name__, "output_folder_full_path") self._limit_start_date = self._config_parser.eval(self.__class__.__name__, "limit_start_date") self._limit_end_date = self._config_parser.eval(self.__class__.__name__, "limit_end_date") self._claim_id_tweets_id_before_dict = defaultdict(set) self._claim_id_tweets_id_after_dict = defaultdict(set) self._posts = [] self._claim_post_connections = [] self._retrieved = 0 self._lock = threading.Lock()
def __init__(self, db): Method_Executor.__init__(self, db) self._actions = self._config_parser.eval(self.__class__.__name__, "actions") self._targeted_twitter_author_ids = self._config_parser.eval( self.__class__.__name__, "targeted_twitter_author_ids") self._targeted_twitter_post_ids = self._config_parser.eval( self.__class__.__name__, "targeted_twitter_post_ids") self._targeted_twitter_author_names = self._config_parser.eval( self.__class__.__name__, "targeted_twitter_author_names") self._social_network_crawler = Twitter_Rest_Api(db)
def __init__(self, db): Method_Executor.__init__(self, db) self._targeted_class_dict = self._config_parser.eval( self.__class__.__name__, "targeted_class_dict") self._divide_lableled_by_percent_training_size = self._config_parser.eval( self.__class__.__name__, "divide_lableled_by_percent_training_size") self._num_of_iterations = self._config_parser.eval( self.__class__.__name__, "num_of_iterations") self._targeted_class_field_name = self._config_parser.eval( self.__class__.__name__, "targeted_class_field_name") self._index_field_name = self._config_parser.eval( self.__class__.__name__, "index_field_name") self._path = self._config_parser.eval(self.__class__.__name__, "path") self._results_table_file_name = self._config_parser.eval( self.__class__.__name__, "results_table_file_name") # in order to reduce thew number of requests to botometer - if you checked once someone keep the result self._screen_name_botometer_score_dict = {}
def __init__(self, db): Method_Executor.__init__(self, db) self._actions = self._config_parser.eval(self.__class__.__name__, "actions") self._minimal_num_of_posts = self._config_parser.eval( self.__class__.__name__, "minimal_num_of_posts") self._limit_friend_follower_number = self._config_parser.eval( self.__class__.__name__, "limit_friend_follower_number") self._maximal_tweets_count_in_timeline = self._config_parser.eval( self.__class__.__name__, "maximal_tweets_count_in_timeline") self._found_twitter_users = [] self._social_network_crawler = Twitter_Rest_Api(db) self._suspended_authors = [] self._max_users_without_saving = self._config_parser.eval( self.__class__.__name__, "max_users_without_saving") self._posts = [] self._authors = [] self._post_citatsions = []
def __init__(self, db): Method_Executor.__init__(self, db) self._lang = self._config_parser.eval(self.__class__.__name__, "language") self._max_num_tweets = self._config_parser.eval( self.__class__.__name__, "max_num_tweets") self._max_num_of_objects_without_saving = self._config_parser.eval( self.__class__.__name__, "max_num_of_objects_without_saving") self._month_interval = self._config_parser.eval( self.__class__.__name__, "month_interval") self._output_folder_full_path = self._config_parser.eval( self.__class__.__name__, "output_folder_full_path") self._limit_start_date = self._config_parser.eval( self.__class__.__name__, "limit_start_date") self._limit_end_date = self._config_parser.eval( self.__class__.__name__, "limit_end_date") self._claim_without_tweets_only = self._config_parser.eval( self.__class__.__name__, "claim_without_tweets_only") self._keywords_types = self._config_parser.eval( self.__class__.__name__, "keywords_types") self._claim_index_to_start_crawling = self._config_parser.eval( self.__class__.__name__, "claim_index_to_start_crawling") self._targeted_claim_ids_for_crawling = self._config_parser.eval( self.__class__.__name__, "targeted_claim_ids_for_crawling") self._topic_terms_dict = self._config_parser.eval( self.__class__.__name__, "topic_terms_dict") self._start_date_interval = self._config_parser.eval( self.__class__.__name__, "start_date_interval") self._start_date_interval = str_to_date(self._start_date_interval) self._end_date_interval = self._config_parser.eval( self.__class__.__name__, "end_date_interval") self._end_date_interval = str_to_date(self._end_date_interval) self._claim_id_tweets_id_before_dict = defaultdict(set) self._claim_id_tweets_id_after_dict = defaultdict(set) self._posts = [] self._claim_post_connections = [] self._retrieved = 0 self._interval_timeline_tweets = []
def __init__(self, db): Method_Executor.__init__(self, db) self._data_folder = self._config_parser.eval(self.__class__.__name__, "data_folder") self._social_network_crawler = Twitter_Rest_Api(db)
def __init__(self, db): Method_Executor.__init__(self, db)
def execute(self, window_start=None): logging.info("Tumblr Parser started !!!!") csv.field_size_limit(sys.maxint) Method_Executor.execute(self, None) logging.info("Tumblr Parser Finished !!!!")