Ejemplo n.º 1
0
    def setUp(self):
        xml = "tests/utpedits2graph/" + \
              "vecwiki-20100307-stub-meta-history-TEST.xml.bz2"
        self.lang, self.date_, self.type_ = mwlib.explode_dump_filename(xml)

        deflate, _lineno = find_open_for_this_file(xml)
        welcome = defaultdict(str)
        welcome.update({'it': r'Benvenut', 'en': r'Welcome'})
        if _lineno:
            src = deflate(xml, 51)  # Read first 51 lines to extract namespaces
        else:
            src = deflate(xml)
        tag = mwlib.get_tags(src,
                             tags='page,title,revision,timestamp,contributor,'
                             'username,ip,comment,id')
        translations = mwlib.get_translations(src)

        try:
            lang_user = unicode(translations['User'])
            lang_user_talk = unicode(translations['User talk'])
        except UnicodeDecodeError:
            lang_user = smart_str(translations['User'])
            lang_user_talk = smart_str(translations['User talk'])
        src.close()
        src = deflate(xml)
        self.processor = HistoryPageProcessor(tag=tag,
                                              user_talk_names=(lang_user_talk,
                                                               u"User talk"))
        self.processor.welcome_pattern = welcome[self.lang]
        self.processor.start(src)
        self.g = self.processor.get_network()
Ejemplo n.º 2
0
    def setUp(self):
        xml = "tests/utpedits2graph/" + \
              "vecwiki-20100307-stub-meta-history-TEST.xml.bz2"
        self.lang, self.date_, self.type_ = mwlib.explode_dump_filename(xml)

        deflate, _lineno = find_open_for_this_file(xml)
        welcome = defaultdict(str)
        welcome.update({'it': r'Benvenut',
                        'en': r'Welcome'})
        if _lineno:
            src = deflate(xml, 51)  # Read first 51 lines to extract namespaces
        else:
            src = deflate(xml)
        tag = mwlib.get_tags(src,
                        tags='page,title,revision,timestamp,contributor,'
                                  'username,ip,comment,id')
        translations = mwlib.get_translations(src)

        try:
            lang_user = unicode(translations['User'])
            lang_user_talk = unicode(translations['User talk'])
        except UnicodeDecodeError:
            lang_user = smart_str(translations['User'])
            lang_user_talk = smart_str(translations['User talk'])
        src.close()
        src = deflate(xml)
        self.processor = HistoryPageProcessor(tag=tag,
                         user_talk_names=(lang_user_talk, u"User talk"))
        self.processor.welcome_pattern = welcome[self.lang]
        self.processor.start(src)
        self.g = self.processor.get_network()
Ejemplo n.º 3
0
class TestUTPEdits(unittest.TestCase):

    def setUp(self):
        xml = "tests/utpedits2graph/" + \
              "vecwiki-20100307-stub-meta-history-TEST.xml.bz2"
        self.lang, self.date_, self.type_ = mwlib.explode_dump_filename(xml)

        deflate, _lineno = find_open_for_this_file(xml)
        welcome = defaultdict(str)
        welcome.update({'it': r'Benvenut',
                        'en': r'Welcome'})
        if _lineno:
            src = deflate(xml, 51)  # Read first 51 lines to extract namespaces
        else:
            src = deflate(xml)
        tag = mwlib.get_tags(src,
                        tags='page,title,revision,timestamp,contributor,'
                                  'username,ip,comment,id')
        translations = mwlib.get_translations(src)

        try:
            lang_user = unicode(translations['User'])
            lang_user_talk = unicode(translations['User talk'])
        except UnicodeDecodeError:
            lang_user = smart_str(translations['User'])
            lang_user_talk = smart_str(translations['User talk'])
        src.close()
        src = deflate(xml)
        self.processor = HistoryPageProcessor(tag=tag,
                         user_talk_names=(lang_user_talk, u"User talk"))
        self.processor.welcome_pattern = welcome[self.lang]
        self.processor.start(src)
        self.g = self.processor.get_network()

    def test_graph(self):
        self.assertEquals(len(self.g.vs), 7)  # Nodes
        self.assertEquals(len(self.g.es), 9)  # Edges
        self.assertEquals(self.processor.count, 4)
        self.assertEquals(self.processor.count_archive, 0)
        self.assertEquals(self.processor.counter_deleted, 0)
        # Self-loop
        self.assertEquals(1, len([edge for edge in self.g.es \
                                  if edge.target == edge.source]))
Ejemplo n.º 4
0
class TestUTPEdits(unittest.TestCase):
    def setUp(self):
        xml = "tests/utpedits2graph/" + \
              "vecwiki-20100307-stub-meta-history-TEST.xml.bz2"
        self.lang, self.date_, self.type_ = mwlib.explode_dump_filename(xml)

        deflate, _lineno = find_open_for_this_file(xml)
        welcome = defaultdict(str)
        welcome.update({'it': r'Benvenut', 'en': r'Welcome'})
        if _lineno:
            src = deflate(xml, 51)  # Read first 51 lines to extract namespaces
        else:
            src = deflate(xml)
        tag = mwlib.get_tags(src,
                             tags='page,title,revision,timestamp,contributor,'
                             'username,ip,comment,id')
        translations = mwlib.get_translations(src)

        try:
            lang_user = unicode(translations['User'])
            lang_user_talk = unicode(translations['User talk'])
        except UnicodeDecodeError:
            lang_user = smart_str(translations['User'])
            lang_user_talk = smart_str(translations['User talk'])
        src.close()
        src = deflate(xml)
        self.processor = HistoryPageProcessor(tag=tag,
                                              user_talk_names=(lang_user_talk,
                                                               u"User talk"))
        self.processor.welcome_pattern = welcome[self.lang]
        self.processor.start(src)
        self.g = self.processor.get_network()

    def test_graph(self):
        self.assertEquals(len(self.g.vs), 7)  # Nodes
        self.assertEquals(len(self.g.es), 9)  # Edges
        self.assertEquals(self.processor.count, 4)
        self.assertEquals(self.processor.count_archive, 0)
        self.assertEquals(self.processor.counter_deleted, 0)
        # Self-loop
        self.assertEquals(1, len([edge for edge in self.g.es \
                                  if edge.target == edge.source]))