def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') if is_python2 is False: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wt', auto_make_dirs=True) else: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wb', auto_make_dirs=True) self.redditreader = RedditReader(reddit_fname)
class RedditReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp(prefix='test_corpora', dir=os.path.dirname( os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') if PY2 is False: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wt', auto_make_dirs=True) else: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wb', auto_make_dirs=True) self.redditreader = RedditReader(reddit_fname) def test_texts(self): for text in self.redditreader.texts(): self.assertIsInstance(text, unicode_type) def test_texts_limit(self): texts = list(self.redditreader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_texts_min_len(self): for text in self.redditreader.texts(min_len=100): self.assertTrue(len(text) >= 100) def test_records(self): for record in self.redditreader.records(): self.assertIsInstance(record, dict) def test_records_limit(self): records = list(self.redditreader.records(limit=1)) self.assertEqual(len(records), 1) def test_records_score_range(self): score_ranges = [(-2, 2), (5, None), (None, 2)] for score_range in score_ranges: records = list(self.redditreader.records(score_range=score_range)) self.assertEqual(len(records), 1) for record in records: if score_range[0]: self.assertTrue(record['score'] >= score_range[0]) if score_range[1]: self.assertTrue(record['score'] <= score_range[1]) def test_records_subreddit(self): subreddits = [('exmormon', ), {'CanadaPolitics', 'AdviceAnimals'}] expected_lens = (1, 2) for subreddit, expected_len in zip(subreddits, expected_lens): records = list(self.redditreader.records(subreddit=subreddit)) self.assertEqual(len(records), expected_len) for record in records: self.assertTrue(record['subreddit'] in subreddit) def tearDown(self): shutil.rmtree(self.tempdir)
def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') if PY2 is False: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wt', auto_make_dirs=True) else: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wb', auto_make_dirs=True) self.redditreader = RedditReader(reddit_fname)
class RedditReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') if PY2 is False: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wt', auto_make_dirs=True) else: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wb', auto_make_dirs=True) self.redditreader = RedditReader(reddit_fname) def test_texts(self): for text in self.redditreader.texts(): self.assertIsInstance(text, unicode_type) def test_texts_limit(self): texts = list(self.redditreader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_texts_min_len(self): for text in self.redditreader.texts(min_len=100): self.assertTrue(len(text) >= 100) def test_records(self): for record in self.redditreader.records(): self.assertIsInstance(record, dict) def test_records_limit(self): records = list(self.redditreader.records(limit=1)) self.assertEqual(len(records), 1) def test_records_score_range(self): score_ranges = [(-2, 2), (5, None), (None, 2)] for score_range in score_ranges: records = list(self.redditreader.records(score_range=score_range)) self.assertEqual(len(records), 1) for record in records: if score_range[0]: self.assertTrue(record['score'] >= score_range[0]) if score_range[1]: self.assertTrue(record['score'] <= score_range[1]) def test_records_subreddit(self): subreddits = [('exmormon',), {'CanadaPolitics', 'AdviceAnimals'}] expected_lens = (1, 2) for subreddit, expected_len in zip(subreddits, expected_lens): records = list(self.redditreader.records(subreddit=subreddit)) self.assertEqual(len(records), expected_len) for record in records: self.assertTrue(record['subreddit'] in subreddit) def tearDown(self): shutil.rmtree(self.tempdir)
def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') try: with bzip_open(reddit_fname, mode='wt') as f: for comment in REDDIT_COMMENTS: f.write(json.dumps(comment, ensure_ascii=False) + '\n') except ValueError: # Python 2 fail with bzip_open(reddit_fname, mode='wb') as f: for comment in REDDIT_COMMENTS: f.write(json.dumps(comment, ensure_ascii=True) + '\n') self.redditreader = RedditReader(reddit_fname)
class RedditReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') try: with bzip_open(reddit_fname, mode='wt') as f: for comment in REDDIT_COMMENTS: f.write(json.dumps(comment, ensure_ascii=False) + '\n') except ValueError: # Python 2 fail with bzip_open(reddit_fname, mode='wb') as f: for comment in REDDIT_COMMENTS: f.write(json.dumps(comment, ensure_ascii=True) + '\n') self.redditreader = RedditReader(reddit_fname) def test_texts(self): texts = list(self.redditreader.texts()) for text in texts: self.assertIsInstance(text, str) def test_texts_min_len(self): texts = list(self.redditreader.texts(min_len=100)) self.assertEqual(len(texts), 1) def test_texts_limit(self): texts = list(self.redditreader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_comments(self): comments = list(self.redditreader.comments()) for comment in comments: self.assertIsInstance(comment, dict) def test_pages_min_len(self): comments = list(self.redditreader.comments(min_len=100)) self.assertEqual(len(comments), 1) def test_pages_limit(self): comments = list(self.redditreader.comments(limit=1)) self.assertEqual(len(comments), 1) def tearDown(self): for fname in os.listdir(self.tempdir): os.remove(os.path.join(self.tempdir, fname)) os.rmdir(self.tempdir)
class RedditReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) reddit_fname = os.path.join(self.tempdir, 'RC_test.bz2') if PY2 is False: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wt', auto_make_dirs=True) else: write_json_lines(REDDIT_COMMENTS, reddit_fname, mode='wb', auto_make_dirs=True) self.redditreader = RedditReader(reddit_fname) def test_texts(self): texts = list(self.redditreader.texts()) for text in texts: self.assertIsInstance(text, unicode_type) def test_texts_min_len(self): texts = list(self.redditreader.texts(min_len=100)) self.assertEqual(len(texts), 1) def test_texts_limit(self): texts = list(self.redditreader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_comments(self): comments = list(self.redditreader.comments()) for comment in comments: self.assertIsInstance(comment, dict) def test_comments_min_len(self): comments = list(self.redditreader.comments(min_len=100)) self.assertEqual(len(comments), 1) def test_comments_limit(self): comments = list(self.redditreader.comments(limit=1)) self.assertEqual(len(comments), 1) def tearDown(self): for fname in os.listdir(self.tempdir): os.remove(os.path.join(self.tempdir, fname)) os.rmdir(self.tempdir)