def prepare_timeslots(self, start):
        configInst = getConfig() 

        slot_length = int(getConfig().get("BoostAuthorsModel", "seconds_in_slot_unit"))
        seconds_in_slot_unit = int(getConfig().get("BoostAuthorsModel", "boost_jump"))
        window_size = datetime.timedelta(seconds=int(configInst.get("DEFAULT","window_analyze_size_in_sec")))
        
        
        start = unicodedata.normalize('NFKD', start).encode('ascii','ignore')
        last_date = str_to_date(start)+window_size
        
        timeslots = list()

        current_slot_start = str_to_date(start)
        index = 1
        while current_slot_start.date() < last_date.date() + datetime.timedelta(days=1):

            delta = datetime.timedelta(seconds=(slot_length*seconds_in_slot_unit))
            delta = delta * index

            timeslot = Timeslot(current_slot_start, current_slot_start + delta - datetime.timedelta(seconds=1))
            timeslots.append(timeslot)

            current_slot_start = current_slot_start + delta
            index = index + 1

        return timeslots
Beispiel #2
0
        def connect(dbapi_connection, connection_rec):
            dbapi_connection.enable_load_extension(True)
            if (getConfig().eval("OperatingSystem", "windows")):
                dbapi_connection.execute(
                    'SELECT load_extension("%s%s")' % (configInst.get("DB", "DB_path_to_extension"), '.dll'))
            if (getConfig().eval("OperatingSystem", "linux")):
                dbapi_connection.execute(
                    'SELECT load_extension("%s%s")' % (configInst.get("DB", "DB_path_to_extension"), '.so'))

            dbapi_connection.enable_load_extension(False)
 def test_XML_importer_not_overwriting_bad_actor_collector(self):
     self._bad_actor_collector.crawl_bad_actors_followers()
     self._bad_actor_collector.crawl_bad_actors_retweeters()
     self.xml_importer.setUp()
     self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
     self.create_author_table.setUp()
     self.create_author_table.execute(getConfig().eval("DEFAULT", "start_date"))
     author = self.db.get_author_by_author_guid(u'5371821e-67b5-3582-bffb-b293b2554dda')
     self.assertTrue(
         author.xml_importer_insertion_date != None and author.bad_actors_collector_insertion_date != None)
     self.db.session.close()
Beispiel #4
0
 def test_XML_importer_not_overwriting_bad_actor_collector(self):
     self._bad_actor_collector.crawl_bad_actors_followers()
     self._bad_actor_collector.crawl_bad_actors_retweeters()
     self.xml_importer.setUp()
     self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
     self.create_author_table.setUp()
     self.create_author_table.execute(getConfig().eval("DEFAULT", "start_date"))
     res = self.db.get_author_by_author_guid(compute_author_guid_by_author_name(u'adoula01'))
     author = res
     self.assertTrue(
         author.xml_importer_insertion_date != None and author.bad_actors_collector_insertion_date != None)
     self.db.session.close()
    def setUp(self):

        ###############################################################
        # MODULES
        ###############################################################
        module_dict = {}

        module_dict["SyntaxFeatureGenerator"] = SyntaxFeatureGenerator
        module_dict["BehaviorFeatureGenerator"] = BehaviorFeatureGenerator
        module_dict["GraphFeatureGenerator_1"] = GraphFeatureGenerator
        module_dict[
            "AccountPropertiesFeatureGenerator"] = AccountPropertiesFeatureGenerator
        module_dict["GraphFeatureGenerator_2"] = GraphFeatureGenerator
        module_dict[
            "DistancesFromTargetedClassFeatureGenerator"] = DistancesFromTargetedClassFeatureGenerator

        module_dict[
            "Word_Embeddings_Comparison_Feature_Generator"] = Word_Embeddings_Comparison_Feature_Generator
        module_dict[
            "Word_Embeddings_Feature_Generator"] = Word_Embeddings_Feature_Generator

        module_dict[
            "Word_Embedding_Differential_Feature_Generator"] = Word_Embedding_Differential_Feature_Generator
        module_dict["ClaimFeatureGenerator"] = ClaimFeatureGenerator
        # LinkPredictionFeatureExtractor must be the latest. Due to the deletion of features of the anchor authors.
        module_dict[
            "LinkPredictionFeatureExtractor"] = LinkPredictionFeatureExtractor

        ###############################################################
        ## SETUP
        logging.config.fileConfig(getConfig().get("DEFAULT",
                                                  "Logger_conf_file"))
        logging.info("Start Execution ... ")
        logging.info("SETUP global variables")
        window_start = getConfig().eval("DEFAULT", "start_date")
        logging.info("CREATE pipeline")

        authors = self._db.get_authors_by_domain(self._domain)
        posts = self._db.get_posts_by_domain(self._domain)
        graphs = {}

        parameters = {"authors": authors, "posts": posts, "graphs": graphs}

        for module in self._config_parser.sections():
            if module_dict.get(module):
                if module.startswith(
                        "GraphFeatureGenerator") or module.startswith(
                            "DistancesFromTargetedClassFeatureGenerator"):
                    self._add_graph_features_to_params(module, parameters)

                self._pipeline.append(
                    module_dict.get(module)(self._db, **parameters))
Beispiel #6
0
 def test_bad_actor_collector_not_overwriting_XML_importer(self):
     self.xml_importer.setUp()
     self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
     self.create_author_table.setUp()
     self.create_author_table.execute(getConfig().eval(
         "DEFAULT", "start_date"))
     self._bad_actor_collector.execute()
     res = self.db.get_author_by_author_guid_and_domain(
         u'5371821e67b53582bffbb293b2554dda', self._domain)
     author = res[0]
     self.assertTrue(author.xml_importer_insertion_date != None
                     and author.bad_actors_collector_insertion_date != None)
     self.db.session.close()
Beispiel #7
0
    def __init__(self, db, **kwargs):
        self._db = db
        self.config_parser = getConfig()

        start_date = self.config_parser.get("DEFAULT",
                                            "start_date").strip("date('')")
        self._window_start = datetime.datetime.strptime(
            start_date, '%Y-%m-%d %H:%M:%S')
        self._window_size = datetime.timedelta(seconds=int(
            self.config_parser.get("DEFAULT", "window_analyze_size_in_sec")))
        self._window_end = self._window_start + self._window_size

        if 'authors' in kwargs and 'posts' in kwargs:
            self.authors = kwargs['authors']
            self.author_guid_posts_dict = kwargs['posts']
        else:
            raise Exception('Author object was not passed as parameter')

        if not self._db.is_export_key_authors_view_exist(
        ):  # the required view,export_key_Authors, doesn't exist.
            logging.error(
                "Cannot initiate KeyAuthorScoreFeatureGenerator as the export_key_authors view does not appear in the db"
            )
            self.module_enabled = False
        else:
            self.module_enabled = True
            self.sum_tfidf_dict = db.get_sum_tfidf_scores()
            self.max_tfidf_dict = db.get_max_tfidf_scores()
    def get_pointer_score(self, pointer, total_ref_count, timeslots_accumulated_counts, timeslots):
        configInst = getConfig() 
        seconds_in_slot_unit = int(configInst.get("BoostAuthorsModel", "seconds_in_slot_unit"))

        sorted_timeslots = sorted(timeslots_accumulated_counts.items())

        p_timeslot = self.timeslotUtils.find_matching_timeslot(timeslots, str_to_date(pointer.date))
        accum_p_timeslot = timeslots_accumulated_counts[p_timeslot]

        score = 0

        for timeslot in reversed(sorted_timeslots):
            addition = timeslot[1] - accum_p_timeslot

            time_delta = abs(timeslot[0].end_time - p_timeslot.end_time).total_seconds() / seconds_in_slot_unit
            time_delta = math.pow(time_delta, 2.0)

            if addition==0 and time_delta==0:
                score += 0
            else:
                score += float(addition)/time_delta

            if p_timeslot.start_time==timeslot[0].start_time and p_timeslot.end_time==timeslot[0].end_time:
                break

        return score
Beispiel #9
0
 def testDoubleExecute(self):
     import sys
     sys.argv = [sys.argv[0], 'config.ini']
     db = DB()
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     getTablesQuerys=["select * from posts","select * from authors","select * from topics","select * from author_citations","select * from authors_boost_stats","select * from post_citations","select * from posts_representativeness","select * from posts_to_pointers_scores","select * from posts_to_topic","select * from visualization_windows"]
     listNumTablesRows=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows.append(db.session.execute(tableQuery).scalar())
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     listNumTablesRows2=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows2.append(db.session.execute(tableQuery).scalar())
     self.assertListEqual(listNumTablesRows,listNumTablesRows2,"the two executions had different results")
Beispiel #10
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_account_properties_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        account_properties_feature_generator = AccountPropertiesFeatureGenerator(self._db, **parameters)
        account_properties_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(self._author_features)
Beispiel #11
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._tsv_files_path = self.config.get(
            "TumblrImporter",
            "tsv_test_files_key_author_score_feature_generator")
        self._db = DB()
        self._db.setUp()
        self._tumblr_parser = TumblrImporter(self._db)

        self._author_guid = "150ff707-a6eb-3051-8f3c-f623293c714b"

        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        autotopic_executor = AutotopicExecutor(self._db)
        autotopic_executor.setUp()
        autotopic_executor.execute()

        key_author_model = KeyAuthorsModel(self._db)
        key_author_model.setUp()
        key_author_model.execute()

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        self._key_author_score_feature_generator = KeyAuthorScoreFeatureGenerator(
            self._db, **parameters)
        self._key_author_score_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get(
            "TumblrImporter", "tsv_test_files_graph_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = u"f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        graph_types = self.config.eval("GraphFeatureGenerator_1",
                                       "graph_types")
        algorithms = self.config.eval("GraphFeatureGenerator_1", "algorithms")
        aggregations = self.config.eval("GraphFeatureGenerator_1",
                                        "aggregation_functions")
        neighborhood_sizes = self.config.eval("GraphFeatureGenerator_1",
                                              "neighborhood_sizes")
        distances_from_labeled_authors = self.config.eval(
            "GraphFeatureGenerator_1", "distances_from_labeled_authors")
        graph_directed = self.config.eval("GraphFeatureGenerator_1",
                                          "graph_directed")
        graph_weights = self.config.eval("GraphFeatureGenerator_1",
                                         "graph_weights")

        parameters.update({
            "graph_types":
            graph_types,
            "algorithms":
            algorithms,
            "aggregation_functions":
            aggregations,
            "neighborhood_sizes":
            neighborhood_sizes,
            "graph_directed":
            graph_directed,
            "graph_weights":
            graph_weights,
            "distances_from_labeled_authors":
            distances_from_labeled_authors
        })

        graph_feature_generator = GraphFeatureGenerator(self._db, **parameters)
        graph_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
    def __init__(self, db, **kwargs):

        super(BaseFeatureGenerator, self).__init__(db)
        self._load_limit = self._config_parser.eval("FeatureExtractor", "load_limit")
        self._max_objects_save = self._config_parser.eval("FeatureExtractor", "max_objects_save")
        # self._features_names_count = 0
        self._db = db
        self._config_parser = getConfig()
        self._targeted_social_network = self._config_parser.get("DEFAULT", "social_network_name")

        start_date = self._config_parser.get("DEFAULT", "start_date").strip("date('')")
        self._window_start = datetime.datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S')
        self._window_size = datetime.timedelta(
            seconds=int(self._config_parser.get("DEFAULT", "window_analyze_size_in_sec")))
        # self._window_end = self._window_start + self._window_size
        self._domain = str(self._config_parser.get(self.__class__.__name__, "domain"))

        if 'authors' in kwargs and 'posts' in kwargs:
            self.authors = kwargs['authors']
            self.author_guid_posts_dict = kwargs['posts']
        else:
            raise Exception('Author object was not passed as parameter')

        if 'measure' in kwargs:
            self._measure = kwargs['measure']
        if 'calculator_type' in kwargs:
            self._calculator_type = kwargs['calculator_type']
        if 'aggregation_function' in kwargs:
            self._aggregation_function = kwargs['aggregation_function']
        if 'graph_type' in kwargs:
            self._graph_type = kwargs['graph_type']
        if 'targeted_class_field_name' in kwargs:
            self._targeted_class_field_name = kwargs['targeted_class_field_name']
    def __init__(self, db, **kwargs):
        self.config_parser = getConfig()
        start_date = self.config_parser.get("DEFAULT",
                                            "start_date").strip("date('')")
        self._window_start = datetime.datetime.strptime(
            start_date, '%Y-%m-%d %H:%M:%S')
        self._window_size = datetime.timedelta(seconds=int(
            self.config_parser.get("DEFAULT", "window_analyze_size_in_sec")))
        self._window_end = self._window_start + self._window_size
        self._db = db

        self._targeted_classes = self.config_parser.eval(
            "DEFAULT", "targeted_classes")

        if 'authors' in kwargs:
            self._authors = kwargs['authors']
            self._author_dict = self._create_author_dictionary(self._authors)
        else:
            raise Exception('Author object was not passed as parameter')

        if kwargs.viewkeys() >= {
                'graph_types', 'algorithms', 'aggregation_functions',
                'neighborhood_sizes', 'graph_weights', 'graph_directed'
        }:
            self.set_graph_vars(kwargs)
        else:
            raise Exception(
                'Graph parameters for feature generation are missing or incomplete'
            )
    def execute(self):
        start_time = time.time()
        info_msg = "execute started for " + self.__class__.__name__
        logging.info(info_msg)

        total_authors = len(self.authors)
        processed_authors = 0
        features = getConfig().eval(self.__class__.__name__, "feature_list")

        authors_features = []
        for author in self.authors:
            author_guid = author.author_guid
            if author_guid in self.author_guid_posts_dict.keys():
                posts = self.author_guid_posts_dict[str(author.author_guid)]
                getattr(self, 'cleanUp')()
                for feature in features:
                    author_feature = self.run_and_create_author_feature(
                        author, feature, posts, author_guid, feature)
                    authors_features.append(author_feature)

            processed_authors += 1
            print("\r processed authors " + str(processed_authors) + " from " +
                  str(total_authors),
                  end="")

        if authors_features:
            self.submit_author_features_to_db(authors_features)

        end_time = time.time()
        diff_time = end_time - start_time
        print('execute finished in ' + str(diff_time) + ' seconds')
Beispiel #16
0
    def execute(self, window_start):
        win_analyze = datetime.timedelta(seconds=int(getConfig().get(
            "DEFAULT", "window_analyze_size_in_sec")))
        window_end = window_start + win_analyze

        posts_data = self._db.get_posts_data(window_start, window_end)
        key_posts_score = self._db.get_key_posts_score(window_start,
                                                       window_end)
        key_authors_score = self._db.get_key_authors_score(
            window_start, window_end)
        topics_data = self._db.get_topics_data(window_start, window_end)
        posts_references_count = self._db.get_reference_count(
            window_start, window_end)

        output = []
        for post_id in posts_data:
            post_data = []
            post_data = post_data + self.get_post_data(posts_data[post_id])
            post_data = post_data + self.get_post_score(
                key_posts_score, post_id)
            post_data = post_data + self.get_author_score(
                key_authors_score, post_id)
            post_data = post_data + self.sample_key_posts_score()
            post_data = post_data + self.get_topic_data(topics_data, post_id)
            post_data = post_data + self.get_posts_references_count(
                posts_references_count, post_id)
            post_data = post_data + self.get_post_references(post_id)
            output = output + [post_data]
        self.write_posts_to_csv(output)
    def __init__(self, db, **kwargs):
        #AbstractController.__init__(self, db)
        self._db = db
        self._config_parser = getConfig()
        self._targeted_social_network = self._config_parser.get(
            "DEFAULT", "social_network_name")

        start_date = self._config_parser.get("DEFAULT",
                                             "start_date").strip("date('')")
        self._window_start = datetime.datetime.strptime(
            start_date, '%Y-%m-%d %H:%M:%S')
        self._window_size = datetime.timedelta(seconds=int(
            self._config_parser.get("DEFAULT", "window_analyze_size_in_sec")))
        self._window_end = self._window_start + self._window_size
        self._domain = unicode(
            self._config_parser.get(self.__class__.__name__, "domain"))

        if 'authors' in kwargs and 'posts' in kwargs:
            self.authors = kwargs['authors']
            self.author_guid_posts_dict = kwargs['posts']
        else:
            raise Exception('Author object was not passed as parameter')

        if kwargs.has_key('measure'):
            self._measure = kwargs['measure']
        if kwargs.has_key('calculator_type'):
            self._calculator_type = kwargs['calculator_type']
        if kwargs.has_key('aggregation_function'):
            self._aggregation_function = kwargs['aggregation_function']
        if kwargs.has_key('graph_type'):
            self._graph_type = kwargs['graph_type']
        if kwargs.has_key('targeted_class_field_name'):
            self._targeted_class_field_name = kwargs[
                'targeted_class_field_name']
Beispiel #18
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
    def __init__(self, db):
        PostImporter.__init__(self, db)
        config_parser = getConfig()
        self.xmlPath = config_parser.get(self.__class__.__name__, "xml_path")

        # self.xmlPath = configInst.get(self.__class__.__name__,"XMDL_source_path")
        self.fileName = None
        self.CurrFolderPath = None
Beispiel #20
0
 def __init__(self, db, query):
     # AbstractController.__init__(self, db)
     self._db = db
     #self._keys = keys
     self._webcrawlers_client = WebCrawlersClient()
     self._config_parser = getConfig()
     self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))
     self._query = query
Beispiel #21
0
 def __init__(self, db, keys, query):
     # AbstractController.__init__(self, db)
     self._db = db
     self._keys = keys
     self._newsapi_client = NewsApiClient(self._keys)
     self._config_parser = getConfig()
     self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))
     self._query = query
Beispiel #22
0
    def __init__(self, db):
        self._db = db
        self._config_parser = getConfig()
        self._commit_threshold = 500

        self._yelp_api = YelpAPI()
        self._radius = 100
        self._print_threshold = 30
Beispiel #23
0
 def __init__(self, db):
     # AbstractController.__init__(self, db)
     self._db = db
     self._twitter_rest_api = Twitter_Rest_Api(db)
     self._config_parser = getConfig()
     self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))
     self._users_to_add = []
     self._post_to_add = []
 def __init__(self, db):
     PostImporter.__init__(self, db)
     config_parser = getConfig()
     self.start_date = config_parser.eval("DEFAULT", "start_date")
     self.end_date = config_parser.eval("DEFAULT", "end_date")
     self._data_folder = self._config_parser.eval(self.__class__.__name__,
                                                  "data_folder")
     self._bad_actor_threshold = self._config_parser.eval(
         self.__class__.__name__, "bad_actor_threshold")
    def setUp(self):

        # TestBase.setUp(self)
        self.config = getConfig()

        self._db = DB()
        self._db.setUp()
        self._tumblr_importer = TumblrImporter(self._db)
        self._tumblr_importer.execute()
    def setUp(self):
        self._config_parser = getConfig()
        self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))
        self._db = DB()
        self._db.setUp()

        self._clear_stractures()

        self._counter = 1
 def setUp(self):
     self.config = getConfig()
     self._db = DB()
     self._db.setUp()
     self._ranked_authors_exporter = RankedAuthorsExporter(self._db)
     self._author_guid_dict = {}
     self.make_authors_posts_and_connections()
     self.csv_location = self.config.eval('RankedAuthorsExporter', 'output_file_path')
     self._threshold = self.config.eval('RankedAuthorsExporter', 'threshold')
 def test_umlaut_chars_in_authors_names(self):
     '''
     Tests that authors' names that contain umlaut are encoded properly, i.e. o with umlaut => o, u wit umlaut => u etc.
     '''
     config_parser = getConfig().get_config_parser()
     xmlPath = config_parser.get("XMLImporter", "xml_source_path")
     listdic = self.xml_importer.parseXMLsToListdict(xmlPath)
     author_name = listdic[0][u'author']
     self.assertFalse(u'\xf6' in author_name, "Author name contain umlaut - unlaut wasn't encoded properly")
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        self._model = Word_Embedding_Differential_Feature_Generator(self._db)

        self._posts = []
        self._author = None
        self._set_author(u'test_user')
    def __init__(self):
        config_parser = getConfig()
        logging.config.fileConfig(getConfig().get("DEFAULT",
                                                  "logger_conf_file"))
        self._db = DB()
        self._db.setUp()
        self._acquired_bad_authors = []
        self._suspected_authors = []
        self.common_posts_threshold = config_parser.eval(
            self.__class__.__name__, "common_posts_threshold")
        self.output_path = config_parser.eval(self.__class__.__name__,
                                              "output_path")
        self.output_dir = config_parser.eval(self.__class__.__name__,
                                             "output_dir")
        # self.unlabeled_prediction_threshold = config_parser.eval(self.__class__.__name__, "unlabeled_prediction_threshold")

        if not os.path.exists(self.output_path + "/" + self.output_dir):
            os.makedirs(self.output_path + "/" + self.output_dir)
        self._source_author_destination_author_num_of_mutual_posts_dict = {}