Beispiel #1
0
    def test_input_manifest(self):
        wet1 = BytesIO()
        writer1 = WARCWriter(wet1, gzip=False)

        write_conversion_record(writer1, 'https://nophonenumbershere.info',
                                b'THIS-IS-NOT-A-NUMBER')
        write_conversion_record(
            writer1, 'https://big.directory/',
            b'The Time: (612) 777-9311\nJenny: (201) 867-5309\n')

        wet2_gz_path = join(self.tmp_dir, 'wet2.warc.wet.gz')
        with open(wet2_gz_path, 'wb') as wet2:
            writer2 = WARCWriter(wet2, gzip=True)

            write_conversion_record(writer2, 'https://jseventplanning.biz/',
                                    b'contact us at +1 201 867 5309')

        self.assertEqual(
            run_job(MRPhoneToURL(['-r', self.RUNNER, wet2_gz_path, '-']),
                    raw_input=wet1.getvalue()), self.EXPECTED_OUTPUT)
Beispiel #2
0
    def test_setup_cmd(self):
        wet_path = join(self.tmp_dir, 'wet.warc.wet.gz')
        with open(wet_path, 'wb') as wet:
            writer = WARCWriter(wet)

            write_conversion_record(
                writer, 'https://big.directory/',
                b'The Time: (612) 777-9311\nJenny: (201) 867-5309\n')
            write_conversion_record(writer, 'https://jseventplanning.biz/',
                                    b'contact us at +1 201 867 5309')

        touched_path = join(self.tmp_dir, 'touched')
        setup_cmd = 'touch ' + touched_path

        self.assertFalse(exists(touched_path))

        self.assertEqual(
            run_job(
                MRPhoneToURL(
                    ['-r', self.RUNNER, '--setup', setup_cmd, wet_path])),
            self.EXPECTED_OUTPUT)

        self.assertTrue(exists(touched_path))
Beispiel #3
0
 def test_empty(self):
     self.assertEqual(run_job(MRPhoneToURL()), {})