def test_timestripper_match_only(self): """Test that latest date is used instead of other dates.""" ts = TimeStripper(self.get_site()) later_date = '10:57 06 June 2015 (UTC)' txt_match = '<!-- --> ' + self.user_and_date + ' <!-- -->' + later_date res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone) self.assertEqual(ts.timestripper(txt_match), res) earlier_date = '02:57 06 June 2015 (UTC)' txt_match = '<!-- ' + self.user_and_date + ' --> ' + earlier_date res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) self.assertEqual(ts.timestripper(txt_match), res)
def test_timestripper_match(self): """Test that dates in comments are correctly recognised.""" ts = TimeStripper(self.get_site()) txt_match = '<!-- [[User:Do___ArchiveUntil]] ' + self.date + ' -->' res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) self.assertEqual(ts.timestripper(txt_match), res) txt_match = '<!-- --> <!-- ' + self.user_and_date + ' <!-- -->' res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) self.assertEqual(ts.timestripper(txt_match), res) txt_match = '<!-- ' + self.user_and_date + ' -->' res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) self.assertEqual(ts.timestripper(txt_match), res)
class TestTimeStripper(PywikibotTestCase): """Test cases for Link objects""" def setUp(self): site = pywikibot.Site('fr', 'wikipedia') self.ts = TimeStripper(site) super(TestTimeStripper, self).setUp() def test_findmarker(self): """Test that string which is not part of text is found""" txt = u'this is a string with a maker is @@@@already present' self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'), '@@@@@@') def test_last_match_and_replace(self): """Test that pattern matches the righmost item""" txtWithMatch = u'this string has one 1998, 1999 and 3000 in it' txtWithNoMatch = u'this string has no match' pat = self.ts.yearR self.assertEqual(self.ts.last_match_and_replace(txtWithMatch, pat), (u'this string has one @@, @@ and 3000 in it', {'year': u'1999'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, None) ) def test_timestripper(self): """Test that correct date is matched""" txtMatch = u'3 février 2010 à 19:48 (CET) 7 février 2010 à 19:48 (CET)' txtNoMatch = u'3 March 2010 19:48 (CET) 7 March 2010 19:48 (CET)' tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], self.ts.site.siteinfo['timezone']) res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txtMatch), res) self.assertEqual(self.ts.timestripper(txtNoMatch), None)
class DiscussionThread(object): """An object representing a discussion thread on a page, that is something of the form: == Title of thread == Thread content here. ~~~~ :Reply, etc. ~~~~ """ def __init__(self, title, now): self.title = title self.now = now self.content = "" self.ts = TimeStripper(site=site) self.timestamp = None def __repr__(self): return '%s("%s",%d bytes)' \ % (self.__class__.__name__, self.title, len(self.content)) def feed_line(self, line): if not self.content and not line: return self.content += line + '\n' timestamp = self.ts.timestripper(line) if not self.timestamp: # first time self.timestamp = timestamp if timestamp: self.timestamp = max(self.timestamp, timestamp) def size(self): return len(self.title.encode('utf-8')) + len(self.content.encode('utf-8')) + 12 def to_text(self): return "== " + self.title + ' ==\n\n' + self.content def should_be_archived(self, archiver): algo = archiver.get('algo') re_t = re.search(r'^old\((.*)\)$', algo) if re_t: if not self.timestamp: return '' #TODO: handle this: #return 'unsigned' maxage = str2time(re_t.group(1)) if self.now - self.timestamp > maxage: return message('archivebot-older-than') + ' ' + re_t.group(1) return ''
class DiscussionThread(object): """An object representing a discussion thread on a page, that is something of the form: == Title of thread == Thread content here. ~~~~ :Reply, etc. ~~~~ """ def __init__(self, title, now): self.title = title self.now = now self.content = "" self.ts = TimeStripper(site=site) self.timestamp = None def __repr__(self): return '%s("%s",%d bytes)' \ % (self.__class__.__name__, self.title, len(self.content.encode('utf-8'))) def feed_line(self, line): if not self.content and not line: return self.content += line + '\n' timestamp = self.ts.timestripper(line) if not self.timestamp: # first time self.timestamp = timestamp if timestamp: self.timestamp = max(self.timestamp, timestamp) def size(self): return len(self.title.encode('utf-8')) + len( self.content.encode('utf-8')) + 12 def to_text(self): return "== " + self.title + ' ==\n\n' + self.content def should_be_archived(self, archiver): algo = archiver.get('algo') re_t = re.search(r'^old\((.*)\)$', algo) if re_t: if not self.timestamp: return '' #TODO: handle this: #return 'unsigned' maxage = str2time(re_t.group(1)) if self.now - self.timestamp > maxage: return message('archivebot-older-than') + ' ' + re_t.group(1) return ''
class TestTimeStripperWithNoDigitsAsMonths(TestCase): """Test cases for TimeStripper methods.""" family = 'wikipedia' code = 'fr' cached = True def setUp(self): super(TestTimeStripperWithNoDigitsAsMonths, self).setUp() self.ts = TimeStripper(self.get_site()) def test_findmarker(self): """Test that string which is not part of text is found.""" txt = u'this is a string with a maker is @@@@already present' self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'), '@@@@@@') def test_last_match_and_replace(self): """Test that pattern matches and removes items correctly.""" txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it' txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pyearR self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), (u'this string has 3000, @@ and 3000 in it', {'year': u'1999'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), (u'this string has @@, @@ and 3000 in it', {'year': u'1999'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, None) ) txtWithOneMatch = u'this string has XXX, YYY and février in it' txtWithTwoMatch = u'this string has XXX, mars and février in it' txtWithThreeMatch = u'this string has avr, mars and février in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pmonthR self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), (u'this string has XXX, YYY and @@ in it', {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), (u'this string has XXX, @@ and @@ in it', {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat), (u'this string has @@, @@ and @@ in it', {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, None) ) def test_hour(self): """Test that correct hour is matched.""" txtHourInRange = u'7 février 2010 à 23:00 (CET)' txtHourOutOfRange = u'7 février 2010 à 24:00 (CET)' self.assertNotEqual(self.ts.timestripper(txtHourInRange), None) self.assertEqual(self.ts.timestripper(txtHourOutOfRange), None)
class TestTimeStripperLanguage(TestCase): """Test cases for English language""" sites = { 'cswiki': { 'family': 'wikipedia', 'code': 'cs', 'match': u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)', }, 'enwiki': { 'family': 'wikipedia', 'code': 'en', 'match': u'3 February 2010 19:48 (UTC) 7 February 2010 19:48 (UTC)', 'nomatch': u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)', }, 'frwiki': { 'family': 'wikipedia', 'code': 'fr', 'match': u'3 février 2010 à 19:48 (CET) 7 février 2010 à 19:48 (CET)', 'nomatch': u'3 March 2010 19:48 (CET) 7 March 2010 19:48 (CET)', }, 'nowiki': { 'family': 'wikipedia', 'code': 'no', 'match': u'3. feb 2010 kl. 19:48 (CET) 7. feb 2010 kl. 19:48 (UTC)', }, 'ptwiki': { 'family': 'wikipedia', 'code': 'pt', 'match': u'19h48min de 3 de fevereiro de 2010 (UTC) 19h48min de 7 de fevereiro de 2010 (UTC)', }, 'viwiki': { 'family': 'wikipedia', 'code': 'vi', 'match': u'19:48, ngày 15 tháng 9 năm 2008 (UTC) 19:48, ngày 7 tháng 2 năm 2010 (UTC)', 'match2': u'16:41, ngày 15 tháng 9 năm 2008 (UTC) 16:41, ngày 12 tháng 9 năm 2008 (UTC)', 'match3': u'21:18, ngày 13 tháng 8 năm 2014 (UTC) 21:18, ngày 14 tháng 8 năm 2014 (UTC)', 'nomatch1': u'21:18, ngày 13 March 8 năm 2014 (UTC) 21:18, ngày 14 March 8 năm 2014 (UTC)', }, } cached = True def test_timestripper_match(self, key): """Test that correct date is matched.""" self.ts = TimeStripper(self.get_site(key)) tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], self.ts.site.siteinfo['timezone']) txtMatch = self.sites[key]['match'] res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txtMatch), res) if 'match2' not in self.sites[key]: return txtMatch = self.sites[key]['match2'] res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txtMatch), res) if 'match3' not in self.sites[key]: return txtMatch = self.sites[key]['match3'] res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txtMatch), res) def test_timestripper_nomatch(self, key): """Test that correct date is not matched.""" self.ts = TimeStripper(self.get_site(key)) if 'nomatch' in self.sites[key]: txtNoMatch = self.sites[key]['nomatch'] else: txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)' self.assertEqual(self.ts.timestripper(txtNoMatch), None) if 'nomatch1' not in self.sites[key]: return txtNoMatch = self.sites[key]['nomatch1'] self.assertEqual(self.ts.timestripper(txtNoMatch), None)
class TestTimeStripperLanguage(TestCase): """Test cases for English language.""" sites = { 'cswiki': { 'family': 'wikipedia', 'code': 'cs', 'match': '3. 2. 2011, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)', }, 'enwiki': { 'family': 'wikipedia', 'code': 'en', 'match': '3 February 2011 19:48 (UTC) ' '7 February 2010 19:48 (UTC)', 'nomatch': '3. 2. 2011, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)', }, 'fawiki': { 'family': 'wikipedia', 'code': 'fa', 'match': '۳ فوریهٔ ۲۰۱۱، ساعت ۱۹:۴۸ (UTC) ' '۷ فوریهٔ ۲۰۱۰، ساعت ۱۹:۴۸ (UTC)', 'nomatch': '۳ ۲ ۲۰۱۴ ۱۹:۴۸ (UTC) ۷ ۲ ۲۰۱۰ ۱۹:۴۸ (UTC)', }, 'frwiki': { 'family': 'wikipedia', 'code': 'fr', 'match': '3 février 2011 à 19:48 (CET) ' '7 février 2010 à 19:48 (CET)', 'nomatch': '3 March 2011 19:48 (CET) 7 March 2010 19:48 (CET)', }, 'kowiki': { 'family': 'wikipedia', 'code': 'ko', 'match': '2011년 2월 3일 (수) 19:48 (KST) ' '2010년 2월 7일 (수) 19:48 (KST)', }, 'nowiki': { 'family': 'wikipedia', 'code': 'no', 'match': '3. feb 2011 kl. 19:48 (CET) ' '7. feb 2010 kl. 19:48 (UTC)', }, 'ptwiki': { 'family': 'wikipedia', 'code': 'pt', 'match': '19h48min de 3 de fevereiro de 2011 (UTC) 19h48min ' 'de 7 de fevereiro de 2010 (UTC)', }, 'viwiki': { 'family': 'wikipedia', 'code': 'vi', 'match': '19:48, ngày 3 tháng 2 năm 2011 (UTC) ' '19:48, ngày 7 tháng 2 năm 2010 (UTC)', 'match2': '16:41, ngày 15 tháng 9 năm 2001 (UTC) 16:41, ' 'ngày 12 tháng 9 năm 2008 (UTC)', 'match3': '21:18, ngày 13 tháng 8 năm 2011 (UTC) 21:18, ' 'ngày 14 tháng 8 năm 2014 (UTC)', 'nomatch1': '21:18, ngày 13 March 8 năm 2011 (UTC) 21:18, ' 'ngày 14 March 8 năm 2014 (UTC)', }, } cached = True def test_timestripper_match(self, key): """Test that correct date is matched.""" self.ts = TimeStripper(self.get_site(key)) tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], self.ts.site.siteinfo['timezone']) txt_match = self.sites[key]['match'] res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txt_match), res) if 'match2' not in self.sites[key]: return txt_match = self.sites[key]['match2'] res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txt_match), res) if 'match3' not in self.sites[key]: return txt_match = self.sites[key]['match3'] res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone) self.assertEqual(self.ts.timestripper(txt_match), res) def test_timestripper_nomatch(self, key): """Test that correct date is not matched.""" self.ts = TimeStripper(self.get_site(key)) if 'nomatch' in self.sites[key]: txt_no_match = self.sites[key]['nomatch'] else: txt_no_match = '3 March 2011 19:48 (UTC) 7 March 2010 19:48 (UTC)' self.assertEqual(self.ts.timestripper(txt_no_match), None) if 'nomatch1' not in self.sites[key]: return txt_no_match = self.sites[key]['nomatch1'] self.assertEqual(self.ts.timestripper(txt_no_match), None)
class TestTimeStripperWithNoDigitsAsMonths(TestCase): """Test cases for TimeStripper methods.""" family = 'wikipedia' code = 'fr' cached = True def setUp(self): """Set up test cases.""" super(TestTimeStripperWithNoDigitsAsMonths, self).setUp() self.ts = TimeStripper(self.get_site()) def test_findmarker(self): """Test that string which is not part of text is found.""" txt = u'this is a string with a maker is @@@@already present' self.assertEqual(self.ts.findmarker(txt, base=u'@@', delta='@@'), '@@@@@@') def test_last_match_and_replace(self): """Test that pattern matches and removes items correctly.""" txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it' txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pyearR self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), (u'this string has 3000, @@ and 3000 in it', { 'year': u'1999' })) self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), (u'this string has @@, @@ and 3000 in it', { 'year': u'1999' })) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, None)) txtWithOneMatch = u'this string has XXX, YYY and février in it' txtWithTwoMatch = u'this string has XXX, mars and février in it' txtWithThreeMatch = u'this string has avr, mars and février in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pmonthR self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), (u'this string has XXX, YYY and @@ in it', { 'month': u'février' })) self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), (u'this string has XXX, @@ and @@ in it', { 'month': u'février' })) self.assertEqual( self.ts.last_match_and_replace(txtWithThreeMatch, pat), (u'this string has @@, @@ and @@ in it', { 'month': u'février' })) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, None)) def test_hour(self): """Test that correct hour is matched.""" txtHourInRange = u'7 février 2010 à 23:00 (CET)' txtHourOutOfRange = u'7 février 2010 à 24:00 (CET)' self.assertNotEqual(self.ts.timestripper(txtHourInRange), None) self.assertEqual(self.ts.timestripper(txtHourOutOfRange), None)