Example #1
0
    def test_timestripper_match(self, key):
        """Test that correct date is matched."""
        self.ts = TimeStripper(self.get_site(key))

        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
                                 self.ts.site.siteinfo['timezone'])

        txt_match = self.sites[key]['match']

        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)

        if 'match2' not in self.sites[key]:
            return

        txt_match = self.sites[key]['match2']

        res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)

        if 'match3' not in self.sites[key]:
            return

        txt_match = self.sites[key]['match3']

        res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)
class TestTimeStripperWithDigitsAsMonths(TestCase):
    """Test cases for TimeStripper methods."""

    family = 'wikipedia'
    code = 'cs'

    cached = True

    def setUp(self):
        super(TestTimeStripperWithDigitsAsMonths, self).setUp()
        self.ts = TimeStripper(self.get_site())

    def test_last_match_and_replace(self):
        """Test that pattern matches and removes items correctly."""
        txtWithOneMatch = u'this string has XX. YY. 12. in it'
        txtWithTwoMatch = u'this string has XX. 1. 12. in it'
        txtWithThreeMatch = u'this string has 1. 1. 12. in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pmonthR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has XX. YY. 12. in it', {
                             'month': u'12.'
                         }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has XX. 1. 12. in it', {
                             'month': u'12.'
                         }))
        self.assertEqual(
            self.ts.last_match_and_replace(txtWithThreeMatch, pat),
            (u'this string has @@ 1. 12. in it', {
                'month': u'12.'
            }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch, None))
Example #3
0
    def test_timestripper_match_only(self):
        """Test that latest date is used instead of other dates."""
        ts = TimeStripper(self.get_site())

        later_date = '10:57 06 June 2015 (UTC)'
        txt_match = '<!-- --> ' + self.user_and_date + ' <!-- -->' + later_date
        res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone)
        self.assertEqual(ts.timestripper(txt_match), res)

        earlier_date = '02:57 06 June 2015 (UTC)'
        txt_match = '<!-- ' + self.user_and_date + ' --> ' + earlier_date
        res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
        self.assertEqual(ts.timestripper(txt_match), res)
Example #4
0
    def test_timestripper_match(self):
        """Test that dates in comments  are correctly recognised."""
        ts = TimeStripper(self.get_site())

        txt_match = '<!-- [[User:Do___ArchiveUntil]] ' + self.date + ' -->'
        res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
        self.assertEqual(ts.timestripper(txt_match), res)

        txt_match = '<!-- --> <!-- ' + self.user_and_date + ' <!-- -->'
        res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
        self.assertEqual(ts.timestripper(txt_match), res)

        txt_match = '<!-- ' + self.user_and_date + ' -->'
        res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
        self.assertEqual(ts.timestripper(txt_match), res)
Example #5
0
    def test_timestripper_nomatch(self, key):
        """Test that correct date is not matched."""
        self.ts = TimeStripper(self.get_site(key))

        if 'nomatch' in self.sites[key]:
            txt_no_match = self.sites[key]['nomatch']
        else:
            txt_no_match = '3 March 2011 19:48 (UTC) 7 March 2010 19:48 (UTC)'

        self.assertEqual(self.ts.timestripper(txt_no_match), None)

        if 'nomatch1' not in self.sites[key]:
            return

        txt_no_match = self.sites[key]['nomatch1']
        self.assertEqual(self.ts.timestripper(txt_no_match), None)
    def test_timestripper_match(self, key):
        """Test that correct date is matched."""
        self.ts = TimeStripper(self.get_site(key))

        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
                                 self.ts.site.siteinfo['timezone'])

        txtMatch = self.sites[key]['match']

        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)

        if 'match2' not in self.sites[key]:
            return

        txtMatch = self.sites[key]['match2']

        res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)

        if 'match3' not in self.sites[key]:
            return

        txtMatch = self.sites[key]['match3']

        res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)
Example #7
0
 def __init__(self, source, archiver, params=None) -> None:
     """Initializer."""
     super().__init__(source)
     self.threads = []
     self.full = False
     self.archiver = archiver
     # for testing purposes we allow archiver to be None and we are able
     # to create the a DiscussionPage in this way:
     # >>> import pywikibot as py
     # >>> from scripts.archivebot import DiscussionPage
     # >>> d = DiscussionPage(py.Page(py.Site(), <talk page name>), None)
     if archiver is None:
         self.timestripper = TimeStripper(self.site)
     else:
         self.timestripper = self.archiver.timestripper
     self.params = params
     try:
         self.load_page()
     except NoPageError:
         self.header = archiver.get_attr('archiveheader',
                                         i18n.twtranslate(
                                             self.site.code,
                                             'archivebot-archiveheader'))
         if self.params:
             self.header = self.header % self.params
Example #8
0
 def __init__(self, source, archiver, params=None):
     super(DiscussionPage, self).__init__(source)
     self.threads = []
     self.full = False
     self.archiver = archiver
     # for testing purposes we allow archiver to be None and we are able
     # to create the a DiscussionPage in this way:
     # >>> import pywikibot as py
     # >>> from scripts.archivebot import DiscussionPage
     # >>> d = DiscussionPage(py.Page(py.Site(), <talk page name>), None)
     if archiver is None:
         self.timestripper = TimeStripper(self.site)
     else:
         self.timestripper = self.archiver.timestripper
     self.params = params
     self.now = datetime.datetime.utcnow().replace(tzinfo=TZoneUTC())
     try:
         self.load_page()
     except pywikibot.NoPage:
         self.header = archiver.get_attr('archiveheader',
                                         i18n.twtranslate(
                                             self.site.code,
                                             'archivebot-archiveheader'))
         if self.params:
             self.header = self.header % self.params
Example #9
0
    def __init__(self, page, template, salt, force=False) -> None:
        """Initializer.

        param page: a page object to be archived
        type page: pywikibot.Page
        param template: a template with configuration settings
        type template: pywikibot.Page
        param salt: salt value
        type salt: str
        param force: override security value
        type force: bool
        """
        self.attributes = OrderedDict([
            ('archive', ['', False]),
            ('algo', ['old(24h)', False]),
            ('counter', ['1', False]),
            ('maxarchivesize', ['200K', False]),
        ])
        self.salt = salt
        self.force = force
        self.site = page.site
        self.tpl = template
        self.timestripper = TimeStripper(site=self.site)
        self.page = DiscussionPage(page, self)
        self.load_config()
        self.comment_params = {
            'from': self.page.title(),
        }
        self.now = datetime.datetime.utcnow().replace(tzinfo=TZoneUTC())
        self.archives = {}
        self.archived_threads = 0
        self.month_num2orig_names = {}
        for n, (long, short) in enumerate(self.site.months_names, start=1):
            self.month_num2orig_names[n] = {'long': long, 'short': short}
Example #10
0
    def __init__(self, page, template, salt, force=False):
        """Initializer.

        param page: a page object to be archived
        type page: pywikibot.Page
        param template: a template with configuration settings
        type template: pywikibot.Page
        param salt: salt value
        type salt: str
        param force: override security value
        type force: bool
        """
        self.attributes = {
            'algo': ['old(24h)', False],
            'archive': ['', False],
            'maxarchivesize': ['1000M', False],
            'counter': ['1', False],
            'key': ['', False],
        }
        self.salt = salt
        self.force = force
        self.site = page.site
        self.tpl = template
        self.timestripper = TimeStripper(site=self.site)
        self.page = DiscussionPage(page, self)
        self.load_config()
        self.comment_params = {
            'from': self.page.title(),
        }
        self.archives = {}
        self.archived_threads = 0
        self.month_num2orig_names = {}
        for n, (_long, _short) in enumerate(self.site.months_names):
            self.month_num2orig_names[n + 1] = {'long': _long, 'short': _short}
Example #11
0
class TestTimeStripper(PywikibotTestCase):
    """Test cases for Link objects"""

    def setUp(self):
        site = pywikibot.Site('fr', 'wikipedia')
        self.ts = TimeStripper(site)
        super(TestTimeStripper, self).setUp()

    def test_findmarker(self):
        """Test that string which is not part of text is found"""

        txt = u'this is a string with a maker is @@@@already present'
        self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'),
                         '@@@@@@')

    def test_last_match_and_replace(self):
        """Test that pattern matches the righmost item"""

        txtWithMatch = u'this string has one 1998, 1999 and 3000 in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.yearR

        self.assertEqual(self.ts.last_match_and_replace(txtWithMatch, pat),
                         (u'this string has one @@, @@ and 3000 in it',
                          {'year': u'1999'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch,
                          None)
                         )

    def test_timestripper(self):
        """Test that correct date is matched"""

        txtMatch = u'3 février 2010 à 19:48 (CET) 7 février 2010 à 19:48 (CET)'
        txtNoMatch = u'3 March 2010 19:48 (CET) 7 March 2010 19:48 (CET)'

        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
                                 self.ts.site.siteinfo['timezone'])

        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)
        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
Example #12
0
class DiscussionThread(object):
    """An object representing a discussion thread on a page, that is something of the form:

    == Title of thread ==

    Thread content here. ~~~~
    :Reply, etc. ~~~~
    """

    def __init__(self, title, now):
        self.title = title
        self.now = now
        self.content = ""
        self.ts = TimeStripper(site=site)
        self.timestamp = None

    def __repr__(self):
        return '%s("%s",%d bytes)' \
               % (self.__class__.__name__, self.title, len(self.content))

    def feed_line(self, line):
        if not self.content and not line:
            return

        self.content += line + '\n'

        timestamp = self.ts.timestripper(line)

        if not self.timestamp:  # first time
            self.timestamp = timestamp

        if timestamp:
            self.timestamp = max(self.timestamp, timestamp)

    def size(self):
        return len(self.title.encode('utf-8')) + len(self.content.encode('utf-8')) + 12

    def to_text(self):
        return "== " + self.title + ' ==\n\n' + self.content

    def should_be_archived(self, archiver):
        algo = archiver.get('algo')
        re_t = re.search(r'^old\((.*)\)$', algo)
        if re_t:
            if not self.timestamp:
                return ''
            #TODO: handle this:
                #return 'unsigned'
            maxage = str2time(re_t.group(1))
            if self.now - self.timestamp > maxage:
                return message('archivebot-older-than') + ' ' + re_t.group(1)
        return ''
Example #13
0
class DiscussionThread(object):
    """An object representing a discussion thread on a page, that is something of the form:

    == Title of thread ==

    Thread content here. ~~~~
    :Reply, etc. ~~~~
    """
    def __init__(self, title, now):
        self.title = title
        self.now = now
        self.content = ""
        self.ts = TimeStripper(site=site)
        self.timestamp = None

    def __repr__(self):
        return '%s("%s",%d bytes)' \
               % (self.__class__.__name__, self.title, len(self.content.encode('utf-8')))

    def feed_line(self, line):
        if not self.content and not line:
            return

        self.content += line + '\n'

        timestamp = self.ts.timestripper(line)

        if not self.timestamp:  # first time
            self.timestamp = timestamp

        if timestamp:
            self.timestamp = max(self.timestamp, timestamp)

    def size(self):
        return len(self.title.encode('utf-8')) + len(
            self.content.encode('utf-8')) + 12

    def to_text(self):
        return "== " + self.title + ' ==\n\n' + self.content

    def should_be_archived(self, archiver):
        algo = archiver.get('algo')
        re_t = re.search(r'^old\((.*)\)$', algo)
        if re_t:
            if not self.timestamp:
                return ''
            #TODO: handle this:
            #return 'unsigned'
            maxage = str2time(re_t.group(1))
            if self.now - self.timestamp > maxage:
                return message('archivebot-older-than') + ' ' + re_t.group(1)
        return ''
class TestTimeStripperWithDigitsAsMonths(TestCase):

    """Test cases for TimeStripper methods."""

    family = 'wikipedia'
    code = 'cs'

    cached = True

    def setUp(self):
        """Setup a timestripper for the configured site."""
        super(TestTimeStripperWithDigitsAsMonths, self).setUp()
        self.ts = TimeStripper(self.get_site())

    def test_last_match_and_replace(self):
        """Test that pattern matches and removes items correctly."""
        txtWithOneMatch = u'this string has XX. YY. 12. in it'
        txtWithTwoMatch = u'this string has XX. 1. 12. in it'
        txtWithThreeMatch = u'this string has 1. 1. 12. in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pmonthR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has XX. YY. 12. in it',
                          {'month': u'12.'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has XX. 1. 12. in it',
                          {'month': u'12.'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat),
                         (u'this string has @@ 1. 12. in it',
                          {'month': u'12.'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch,
                          None)
                         )
    def test_timestripper_nomatch(self, key):
        """Test that correct date is not matched."""
        self.ts = TimeStripper(self.get_site(key))

        if 'nomatch' in self.sites[key]:
            txtNoMatch = self.sites[key]['nomatch']
        else:
            txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)'

        self.assertEqual(self.ts.timestripper(txtNoMatch), None)

        if 'nomatch1' not in self.sites[key]:
            return

        txtNoMatch = self.sites[key]['nomatch1']
        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
Example #16
0
    def __init__(self, page, template, salt: str, force: bool = False) -> None:
        """Initializer.

        :param page: a page object to be archived
        :type page: :py:obj:`pywikibot.Page`
        :param template: a template with configuration settings
        :type template: :py:obj:`pywikibot.Page`
        :param salt: salt value
        :param force: override security value
        """
        self.attributes = OrderedDict([
            ('archive', ['', False]),
            ('algo', ['old(24h)', False]),
            ('counter', ['1', False]),
            ('maxarchivesize', ['200K', False]),
        ])
        self.salt = salt
        self.force = force
        self.site = page.site
        self.tpl = template
        self.timestripper = TimeStripper(site=self.site)

        # read maxarticlesize
        try:
            # keep a gap of 1 KB not to block later changes
            self.maxsize = self.site.siteinfo['maxarticlesize'] - 1024
        except KeyError:  # mw < 1.28
            self.maxsize = 2096128  # 2 MB - 1 KB gap

        self.page = DiscussionPage(page, self)
        self.load_config()
        self.comment_params = {
            'from': self.page.title(),
        }
        self.now = datetime.datetime.utcnow().replace(tzinfo=TZoneUTC())
        self.archives = {}
        self.archived_threads = 0
        self.month_num2orig_names = {}
        for n, (long, short) in enumerate(self.site.months_names, start=1):
            self.month_num2orig_names[n] = {'long': long, 'short': short}
Example #17
0
 def __init__(self, page, tpl, salt, force=False):
     self.attributes = {
         'algo': ['old(24h)', False],
         'archive': ['', False],
         'maxarchivesize': ['1000M', False],
         'counter': ['1', False],
         'key': ['', False],
     }
     self.salt = salt
     self.force = force
     self.site = page.site
     self.tpl = pywikibot.Page(self.site, tpl)
     self.timestripper = TimeStripper(site=self.site)
     self.page = DiscussionPage(page, self)
     self.load_config()
     self.comment_params = {
         'from': self.page.title(),
     }
     self.archives = {}
     self.archived_threads = 0
     self.month_num2orig_names = {}
     for n, (_long, _short) in enumerate(self.site.months_names):
         self.month_num2orig_names[n + 1] = {"long": _long, "short": _short}
Example #18
0
 def setUp(self):
     super(TestTimeStripperWithDigitsAsMonths, self).setUp()
     self.ts = TimeStripper(self.get_site())
 def setUp(self):
     """Set up test cases."""
     super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
     self.ts = TimeStripper(self.get_site())
Example #20
0
 def setUp(self):
     """Setup a timestripper for the configured site."""
     super(TestTimeStripperWithDigitsAsMonths, self).setUp()
     self.ts = TimeStripper(self.get_site())
Example #21
0
 def __init__(self, title, now):
     self.title = title
     self.now = now
     self.content = ""
     self.ts = TimeStripper(site=site)
     self.timestamp = None
 def setUp(self):
     super(TestTimeStripperWithDigitsAsMonths, self).setUp()
     self.ts = TimeStripper(self.get_site())
class TestTimeStripperWithNoDigitsAsMonths(TestCase):

    """Test cases for TimeStripper methods."""

    family = 'wikipedia'
    code = 'fr'

    cached = True

    def setUp(self):
        super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
        self.ts = TimeStripper(self.get_site())

    def test_findmarker(self):
        """Test that string which is not part of text is found."""
        txt = u'this is a string with a maker is @@@@already present'
        self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'),
                         '@@@@@@')

    def test_last_match_and_replace(self):
        """Test that pattern matches and removes items correctly."""
        txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it'
        txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pyearR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has 3000, @@ and 3000 in it',
                          {'year': u'1999'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has @@, @@ and 3000 in it',
                          {'year': u'1999'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch,
                          None)
                         )

        txtWithOneMatch = u'this string has XXX, YYY and février in it'
        txtWithTwoMatch = u'this string has XXX, mars and février in it'
        txtWithThreeMatch = u'this string has avr, mars and février in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pmonthR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has XXX, YYY and @@ in it',
                          {'month': u'février'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has XXX, @@ and @@ in it',
                          {'month': u'février'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat),
                         (u'this string has @@, @@ and @@ in it',
                          {'month': u'février'})
                         )
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch,
                          None)
                         )

    def test_hour(self):
        """Test that correct hour is matched."""
        txtHourInRange = u'7 février 2010 à 23:00 (CET)'
        txtHourOutOfRange = u'7 février 2010 à 24:00 (CET)'

        self.assertNotEqual(self.ts.timestripper(txtHourInRange), None)
        self.assertEqual(self.ts.timestripper(txtHourOutOfRange), None)
Example #24
0
 def __init__(self, title, now):
     self.title = title
     self.now = now
     self.content = ""
     self.ts = TimeStripper(site=site)
     self.timestamp = None
Example #25
0
 def setUp(self):
     """Set up test cases."""
     super(TestTimeStripperCase, self).setUp()
     self.ts = TimeStripper(self.get_site())
class TestTimeStripperLanguage(TestCase):

    """Test cases for English language"""

    sites = {
        'cswiki': {
            'family': 'wikipedia',
            'code': 'cs',
            'match': u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)',
        },
        'enwiki': {
            'family': 'wikipedia',
            'code': 'en',
            'match': u'3 February 2010 19:48 (UTC) 7 February 2010 19:48 (UTC)',
            'nomatch': u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)',
        },
        'frwiki': {
            'family': 'wikipedia',
            'code': 'fr',
            'match': u'3 février 2010 à 19:48 (CET) 7 février 2010 à 19:48 (CET)',
            'nomatch': u'3 March 2010 19:48 (CET) 7 March 2010 19:48 (CET)',
        },
        'nowiki': {
            'family': 'wikipedia',
            'code': 'no',
            'match': u'3. feb 2010 kl. 19:48 (CET) 7. feb 2010 kl. 19:48 (UTC)',
        },
        'ptwiki': {
            'family': 'wikipedia',
            'code': 'pt',
            'match': u'19h48min de 3 de fevereiro de 2010‎ (UTC) 19h48min de 7 de fevereiro de 2010‎ (UTC)',
        },
        'viwiki': {
            'family': 'wikipedia',
            'code': 'vi',
            'match': u'19:48, ngày 15 tháng 9 năm 2008 (UTC) 19:48, ngày 7 tháng 2 năm 2010 (UTC)',
            'match2': u'16:41, ngày 15 tháng 9 năm 2008 (UTC) 16:41, ngày 12 tháng 9 năm 2008 (UTC)',
            'match3':  u'21:18, ngày 13 tháng 8 năm 2014 (UTC) 21:18, ngày 14 tháng 8 năm 2014 (UTC)',
            'nomatch1': u'21:18, ngày 13 March 8 năm 2014 (UTC) 21:18, ngày 14 March 8 năm 2014 (UTC)',
        },
    }

    cached = True

    def test_timestripper_match(self, key):
        """Test that correct date is matched."""
        self.ts = TimeStripper(self.get_site(key))

        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
                                 self.ts.site.siteinfo['timezone'])

        txtMatch = self.sites[key]['match']

        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)

        if 'match2' not in self.sites[key]:
            return

        txtMatch = self.sites[key]['match2']

        res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)

        if 'match3' not in self.sites[key]:
            return

        txtMatch = self.sites[key]['match3']

        res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txtMatch), res)

    def test_timestripper_nomatch(self, key):
        """Test that correct date is not matched."""
        self.ts = TimeStripper(self.get_site(key))

        if 'nomatch' in self.sites[key]:
            txtNoMatch = self.sites[key]['nomatch']
        else:
            txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)'

        self.assertEqual(self.ts.timestripper(txtNoMatch), None)

        if 'nomatch1' not in self.sites[key]:
            return

        txtNoMatch = self.sites[key]['nomatch1']
        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
Example #27
0
class TestTimeStripperWithNoDigitsAsMonths(TestCase):
    """Test cases for TimeStripper methods."""

    family = 'wikipedia'
    code = 'fr'

    cached = True

    def setUp(self):
        """Set up test cases."""
        super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
        self.ts = TimeStripper(self.get_site())

    def test_findmarker(self):
        """Test that string which is not part of text is found."""
        txt = u'this is a string with a maker is @@@@already present'
        self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'),
                         '@@@@@@')

    def test_last_match_and_replace(self):
        """Test that pattern matches and removes items correctly."""
        txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it'
        txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pyearR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has 3000, @@ and 3000 in it', {
                             'year': u'1999'
                         }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has @@, @@ and 3000 in it', {
                             'year': u'1999'
                         }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch, None))

        txtWithOneMatch = u'this string has XXX, YYY and février in it'
        txtWithTwoMatch = u'this string has XXX, mars and février in it'
        txtWithThreeMatch = u'this string has avr, mars and février in it'
        txtWithNoMatch = u'this string has no match'
        pat = self.ts.pmonthR

        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
                         (u'this string has XXX, YYY and @@ in it', {
                             'month': u'février'
                         }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
                         (u'this string has XXX, @@ and @@ in it', {
                             'month': u'février'
                         }))
        self.assertEqual(
            self.ts.last_match_and_replace(txtWithThreeMatch, pat),
            (u'this string has @@, @@ and @@ in it', {
                'month': u'février'
            }))
        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                         (txtWithNoMatch, None))

    def test_hour(self):
        """Test that correct hour is matched."""
        txtHourInRange = u'7 février 2010 à 23:00 (CET)'
        txtHourOutOfRange = u'7 février 2010 à 24:00 (CET)'

        self.assertNotEqual(self.ts.timestripper(txtHourInRange), None)
        self.assertEqual(self.ts.timestripper(txtHourOutOfRange), None)
Example #28
0
 def setUp(self):
     """Set up test cases."""
     super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
     self.ts = TimeStripper(self.get_site())
Example #29
0
class TestTimeStripperLanguage(TestCase):
    """Test cases for English language."""

    sites = {
        'cswiki': {
            'family': 'wikipedia',
            'code': 'cs',
            'match': '3. 2. 2011, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)',
        },
        'enwiki': {
            'family': 'wikipedia',
            'code': 'en',
            'match': '3 February 2011 19:48 (UTC) '
            '7 February 2010 19:48 (UTC)',
            'nomatch': '3. 2. 2011, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)',
        },
        'fawiki': {
            'family': 'wikipedia',
            'code': 'fa',
            'match': '۳ فوریهٔ  ۲۰۱۱، ساعت ۱۹:۴۸ (UTC) '
            '۷ فوریهٔ  ۲۰۱۰، ساعت ۱۹:۴۸ (UTC)',
            'nomatch': '۳ ۲ ۲۰۱۴ ۱۹:۴۸ (UTC) ۷ ۲ ۲۰۱۰ ۱۹:۴۸ (UTC)',
        },
        'frwiki': {
            'family': 'wikipedia',
            'code': 'fr',
            'match': '3 février 2011 à 19:48 (CET) '
            '7 février 2010 à 19:48 (CET)',
            'nomatch': '3 March 2011 19:48 (CET) 7 March 2010 19:48 (CET)',
        },
        'kowiki': {
            'family': 'wikipedia',
            'code': 'ko',
            'match': '2011년 2월 3일 (수) 19:48 (KST) '
            '2010년 2월 7일 (수) 19:48 (KST)',
        },
        'nowiki': {
            'family': 'wikipedia',
            'code': 'no',
            'match': '3. feb 2011 kl. 19:48 (CET) '
            '7. feb 2010 kl. 19:48 (UTC)',
        },
        'ptwiki': {
            'family':
            'wikipedia',
            'code':
            'pt',
            'match':
            '19h48min de 3 de fevereiro de 2011‎ (UTC) 19h48min '
            'de 7 de fevereiro de 2010‎ (UTC)',
        },
        'viwiki': {
            'family':
            'wikipedia',
            'code':
            'vi',
            'match':
            '19:48, ngày 3 tháng 2 năm 2011 (UTC) '
            '19:48, ngày 7 tháng 2 năm 2010 (UTC)',
            'match2':
            '16:41, ngày 15 tháng 9 năm 2001 (UTC) 16:41, '
            'ngày 12 tháng 9 năm 2008 (UTC)',
            'match3':
            '21:18, ngày 13 tháng 8 năm 2011 (UTC) 21:18, '
            'ngày 14 tháng 8 năm 2014 (UTC)',
            'nomatch1':
            '21:18, ngày 13 March 8 năm 2011 (UTC) 21:18, '
            'ngày 14 March 8 năm 2014 (UTC)',
        },
    }

    cached = True

    def test_timestripper_match(self, key):
        """Test that correct date is matched."""
        self.ts = TimeStripper(self.get_site(key))

        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
                                 self.ts.site.siteinfo['timezone'])

        txt_match = self.sites[key]['match']

        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)

        if 'match2' not in self.sites[key]:
            return

        txt_match = self.sites[key]['match2']

        res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)

        if 'match3' not in self.sites[key]:
            return

        txt_match = self.sites[key]['match3']

        res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)

        self.assertEqual(self.ts.timestripper(txt_match), res)

    def test_timestripper_nomatch(self, key):
        """Test that correct date is not matched."""
        self.ts = TimeStripper(self.get_site(key))

        if 'nomatch' in self.sites[key]:
            txt_no_match = self.sites[key]['nomatch']
        else:
            txt_no_match = '3 March 2011 19:48 (UTC) 7 March 2010 19:48 (UTC)'

        self.assertEqual(self.ts.timestripper(txt_no_match), None)

        if 'nomatch1' not in self.sites[key]:
            return

        txt_no_match = self.sites[key]['nomatch1']
        self.assertEqual(self.ts.timestripper(txt_no_match), None)
 def setUp(self):
     site = pywikibot.Site('fr', 'wikipedia')
     self.ts = TimeStripper(site)
Example #31
0
 def setUp(self):
     site = pywikibot.Site('fr', 'wikipedia')
     self.ts = TimeStripper(site)