def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._tsv_files_path = self.config.get(
            "TumblrImporter", "tsv_test_files_syntax_feature_generator")
        self._db = DB()
        self._db.setUp()
        self._tumblr_parser = TumblrImporter(self._db)

        self._author_guid = "150ff707-a6eb-3051-8f3c-f623293c714b"

        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}
        self._syntax_feature_generator = SyntaxFeatureGenerator(
            self._db, **parameters)

        self._syntax_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)

        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
Beispiel #2
0
 def testSyntaxFeatureGenerator(self):
     parameters = {"authors": self.authors, "posts": self.posts}
     syntax_feature = SyntaxFeatureGenerator(self._db, **parameters)
     syntax_feature.execute()
     allFeatures = self._db.get_author_features()
     if allFeatures is not None:
         for feature in allFeatures:
             if feature.author_guid == 'TestUser1' and feature.attribute_name == "average_hashtags":
                 self.assertEqual(feature.attribute_value, '1.0')
             elif feature.author_guid == 'TestUser4' and feature.attribute_name == "average_user_mentions":
                 self.assertEqual(feature.attribute_value, '1.0')
             elif feature.author_guid == 'TestUser2' and feature.attribute_name == "average_links":
                 self.assertEqual(feature.attribute_value, '1.0')
             elif feature.author_guid == 'TestUser3' and feature.attribute_name == "average_post_lenth":
                 self.assertEqual(feature.attribute_value, '5.0')
     self._db.session.close()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self.syntax_feature_generator = SyntaxFeatureGenerator(self._db, **{})
class TestTumblrImporterSyntaxFeatureGenerator(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._tsv_files_path = self.config.get(
            "TumblrImporter", "tsv_test_files_syntax_feature_generator")
        self._db = DB()
        self._db.setUp()
        self._tumblr_parser = TumblrImporter(self._db)

        self._author_guid = "150ff707-a6eb-3051-8f3c-f623293c714b"

        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}
        self._syntax_feature_generator = SyntaxFeatureGenerator(
            self._db, **parameters)

        self._syntax_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)

        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)

    def test_average_hashtags(self):

        attribute_value = self._author_features_dict["average_hashtags"]
        attribute_value = float(attribute_value)
        self.assertEquals(0.5, attribute_value)

    def test_average_links(self):

        attribute_value = self._author_features_dict["average_links"]
        attribute_value = float(attribute_value)
        self.assertEquals(0.5, attribute_value)

    def test_average_user_mentions(self):
        attribute_value = self._author_features_dict["average_user_mentions"]
        attribute_value = float(attribute_value)
        self.assertEquals(0.5, attribute_value)

    def test_average_post_lenth(self):
        attribute_value = self._author_features_dict["average_post_lenth"]
        attribute_value = float(attribute_value)
        self.assertEquals(6.0, attribute_value)

    def tearDown(self):
        self._db.deleteDB()
        pass

    def _create_author_features_dictionary(self, author_features):
        author_features_dict = {}
        for author_feature in author_features:
            attribute_name = author_feature.attribute_name
            attribute_value = author_feature.attribute_value
            author_features_dict[attribute_name] = attribute_value

        return author_features_dict
class TestSyntaxFeatureGenerator(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._author = None
        self.syntax_feature_generator = SyntaxFeatureGenerator(self._db, **{})

    def tearDown(self):
        self._db.session.close()
        pass

    def test_average_hashtags(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"#content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post3", u"content 1 #tag #yes", "2017-06-12 05:00:00")
        self._add_post(u"post4", u"content #1 #test #dont #fail #please",
                       "2017-06-12 05:00:00")

        self.syntax_feature_generator.execute()

        author_feature = self._db.get_author_feature(
            u"author_guid", u"SyntaxFeatureGenerator_average_hashtags")
        self.assertAlmostEqual(float(author_feature.attribute_value),
                               8.0 / 4,
                               places=4)

    def test_average_links(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"#content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"https://www.google.co.il 1",
                       "2017-06-12 05:00:00")
        self._add_post(u"post3", u"content 1 #tag http://www.google.co.il",
                       "2017-06-12 05:00:00")
        self._add_post(
            u"post4",
            u"http://www.bank.co.il #1 #test #dont http://www.ynet.co.il https://www.msn.co.il",
            "2017-06-12 05:00:00")

        self.syntax_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u"author_guid", u"SyntaxFeatureGenerator_average_links")
        self.assertAlmostEqual(float(author_feature.attribute_value),
                               5.0 / 4,
                               places=4)

    def test_average_user_mentions(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"@content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post3", u"content 1 @tag #@es", "2017-06-12 05:00:00")
        self._add_post(u"post4", u"content #1 @test @dont @fail #please",
                       "2017-06-12 05:00:00")

        self.syntax_feature_generator.execute()

        author_feature = self._db.get_author_feature(
            u"author_guid", u"SyntaxFeatureGenerator_average_user_mentions")
        self.assertAlmostEqual(float(author_feature.attribute_value),
                               6.0 / 4,
                               places=4)

    def test_average_post_lenth(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post3", u"content 1 @tag #@es", "2017-06-12 05:00:00")
        self._add_post(u"post4", u"content #1 @test @dont @fail #please",
                       "2017-06-12 05:00:00")

        self.syntax_feature_generator.execute()

        author_feature = self._db.get_author_feature(
            u"author_guid", u"SyntaxFeatureGenerator_average_post_lenth")
        self.assertAlmostEqual(float(author_feature.attribute_value),
                               14.0 / 4,
                               places=4)

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = u'test author'
        author.name = u'test'
        author.domain = u'tests'
        author.statuses_count = 0
        self._db.add_author(author)
        self._author = author

    def _add_post(self, title, content, date_str, domain=u'Microblog'):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime(date_str)
        post.created_at = post.date
        self._db.addPost(post)
        self._posts.append(post)

        self._author.statuses_count += 1