Ejemplo n.º 1
0
 def __init__(self, db):
     PostImporter.__init__(self, db)
     config_parser = getConfig()
     self.start_date = config_parser.eval("DEFAULT", "start_date")
     self.end_date = config_parser.eval("DEFAULT", "end_date")
     self._data_folder = self._config_parser.eval(self.__class__.__name__,
                                                  "data_folder")
 def __init__(self, db):
     PostImporter.__init__(self, db)
     self._max_number_of_threads = self._config_parser.eval(
         self.__class__.__name__, "max_number_of_threads")
     self._shorten_url_expended_url_dict = {}
     self._destination_url_source_url_dict = {}
     self.resolved_urls = []
Ejemplo n.º 3
0
    def __init__(self, db):
        PostImporter.__init__(self, db)
        self._db = db
        self._twitter_crawler = Generic_Twitter_Crawler(db)
        self._missing_data_complementor = MissingDataComplementor(db)
        self._original_tsv_location = self._config_parser.eval(
            self.__class__.__name__, "original_tsv_location")
        self._limit_per_crawl = self._config_parser.eval(
            self.__class__.__name__, "limit_per_crawl")
        self._post_label_dict = {}
        self._start_len = len(self._post_label_dict.keys())

        self._num_of_rows_in_dataset = 0
        self._post_author_to_label_dict = {}
        chrome_options = Options()
        chrome_options.add_argument(
            "--headless")  # to make the process run in background
        self._web_driver = selenium.webdriver.Chrome(
            executable_path=r'vendors\chromedriver\chromedriver.exe',
            chrome_options=chrome_options)

        # statistics vars
        self._counter = 0
        self._more_than_one = 0
        self._exactly_one = 0

        self._loggin_processed = 0
 def __init__(self, db):
     PostImporter.__init__(self, db)
     self._past_hours = self._config_parser.eval(self.__class__.__name__,
                                                 "past_hours")
     self._author_name_as_domain = self._config_parser.eval(
         self.__class__.__name__, "author_name_as_domain")
     self._author_classify_dict = {}
     self._author_prop_dict = {}
Ejemplo n.º 5
0
    def __init__(self, db):

        PostImporter.__init__(self, db)
        config_parser = getConfig()
        self.xmlPath = config_parser.get(self.__class__.__name__, "xml_path")

        # self.xmlPath = configInst.get(self.__class__.__name__,"XMDL_source_path")
        self.fileName = None
        self.CurrFolderPath = None
Ejemplo n.º 6
0
    def __init__(self,db):
        
        PostImporter.__init__(self,db)
        
        configInst = getConfig()

        self.source_path = configInst.get(self.__class__.__name__,"FDL_source_path")
        self.file_date_format = configInst.get(self.__class__.__name__,"date_format")
        self.fileName = None #@review: not a field. make it local variable. 
        self.URLforUnittest = None #@review: no code should be written especially for unittests (except the tests)
        self.CurrFolderPath = None #@review: coding convention: small letter field names and _ for private members 
Ejemplo n.º 7
0
 def __init__(self, db):
     PostImporter.__init__(self, db)
     config_parser = getConfig()
     self.start_date = config_parser.eval("DEFAULT", "start_date")
     self.end_date = config_parser.eval("DEFAULT", "end_date")
     self._data_folder = self._config_parser.eval(self.__class__.__name__,
                                                  "data_folder")
     self._bad_actor_threshold = self._config_parser.eval(
         self.__class__.__name__, "bad_actor_threshold")
     self._optional_classes = self._config_parser.eval(
         self.__class__.__name__, "optional_classes")
     self._author_classify_dict = {}
 def __init__(self, db):
     # politifact posts title are biased, dont use them as features
     PostImporter.__init__(self, db)
     self._domain = u"PolitiFact"
     self._subjects = self._actions = self._config_parser.eval(
         self.__class__.__name__, "subjects")
     self._posts_per_subject = self._actions = self._config_parser.eval(
         self.__class__.__name__, "posts_per_subject")
     self._post_types = self._actions = self._config_parser.eval(
         self.__class__.__name__, "post_types")
     self._author_classify_dict = {}
     self._author_prop_dict = {}
     self._post_type_dict = {}
Ejemplo n.º 9
0
 def __init__(self, db):
     PostImporter.__init__(self, db)
     self._author_name_as_domain = self._config_parser.eval(
         self.__class__.__name__, "author_name_as_domain")
     self._retrieve_news_by_keywords = self._config_parser.eval(
         self.__class__.__name__, "retrieve_news_by_keywords")
     self._num_of_top_terms = self._config_parser.eval(
         self.__class__.__name__, "num_of_top_terms")
     self._filter_sentences = self._config_parser.eval(
         self.__class__.__name__, "filter_sentences")
     self._topic_term_manager = Topic_Term_Manager(db)
     self._characters_to_add_to_unstemmed_words = [
         'e', 'able', 'al', 'ial'
         'ion', 'ing', 'er', 'ies'
     ]
Ejemplo n.º 10
0
    def __init__(self, db):
        PostImporter.__init__(self, db)
        self._input_csv_file = self._config_parser.eval(self.__class__.__name__, "input_csv_file")

        # There is no author so the website would be the author. We should not include this author in the analysis.
        self._author_name = unicode("snopes")
 def __init__(self, db):
     PostImporter.__init__(self, db)
     self._data_folder = self._config_parser.eval(self.__class__.__name__,
                                                  "data_folder")
Ejemplo n.º 12
0
 def execute(self, window_start=None):
     logging.info("execute")
     logging.info("PostImporter.execute(self, window_start)")
     PostImporter.execute(self, window_start)
 def __init__(self, db):
     PostImporter.__init__(self, db)
     self._path_to_file = self._config_parser.eval(self.__class__.__name__,
                                                   "path_to_file")