Example #1
0
    def test_simple_grouped_export(self):
        config = self.build_config(
            writer={
                'name': 'exporters.writers.fs_writer.FSWriter',
                'options': {
                    'filebase':
                    os.path.join(self.tmp_dir,
                                 '{file_number}_ds_dump_{groups[0]}'),
                    'items_per_buffer_write':
                    100,
                }
            },
            grouper={
                'name': 'exporters.groupers.file_key_grouper.FileKeyGrouper',
                'options': {
                    'keys': ['state'],
                }
            })
        config['reader']['options']['number_of_items'] = 1000
        self.exporter = exporter = BaseExporter(config)
        exporter.export()
        used_paths = [
            value['path_safe_keys'][0] for key, value in
            exporter.writer.write_buffer.grouping_info.iteritems()
        ]
        regex = '\d_ds_dump_({})'.format('|'.join(used_paths))
        written_files = [
            os.path.basename(f) for f in exporter.writer.written_files.keys()
        ]

        for w_file in written_files:
            self.assertRegexpMatches(w_file, regex)
Example #2
0
 def test_grouping_with_non_valid_characters(self):
     config = self.build_config(
         writer={
             'name': 'exporters.writers.fs_writer.FSWriter',
             'options': {
                 'filebase':
                 os.path.join(self.tmp_dir,
                              '{file_number}_ds_dump_{groups[0]}'),
                 'items_per_buffer_write':
                 100,
             }
         },
         grouper={
             'name': 'exporters.groupers.file_key_grouper.FileKeyGrouper',
             'options': {
                 'keys': ['country_code'],
             }
         })
     config['reader']['options']['number_of_items'] = 1000
     expected_written_files = set([
         '0_ds_dump_us.jl.gz',
         '0_ds_dump_es.jl.gz',
         '0_ds_dump_uk.jl.gz',
     ])
     self.exporter = exporter = BaseExporter(config)
     exporter.export()
     written_files = [
         os.path.basename(f) for f in exporter.writer.written_files.keys()
     ]
     self.assertContainsSubset(written_files, expected_written_files)
Example #3
0
    def test_skipped_bypass(self):
        class SkippedBypass(BaseBypass):
            @classmethod
            def meets_conditions(cls, config):
                return False

        options = {
            'reader': {
                'name': 'exporters.readers.random_reader.RandomReader',
                'options': {
                    'number_of_items': 10,
                    'batch_size': 3
                }
            },
            'writer': {
                'name': 'tests.utils.NullWriter'
            },
            'persistence': {
                'name': 'tests.utils.NullPersistence',
            }
        }
        self.exporter = exporter = BaseExporter(options)
        exporter.bypass_cases.append(SkippedBypass)
        exporter.export()
        self.assertEqual(exporter.reader.get_metadata('read_items'),
                         10,
                         msg='There should be 10 read items')
        self.assertEqual(exporter.writer.get_metadata('items_count'),
                         10,
                         msg='There should be 10 written items')
Example #4
0
 def test_fs_grouping_without_filebase(self):
     with TemporaryDirectory() as tmp_dir:
         options = {
             'reader': {
                 'name': 'exporters.readers.random_reader.RandomReader',
                 'options': {
                     'number_of_items': 123,
                     'batch_size': 7
                 }
             },
             'writer': {
                 'name': 'exporters.writers.fs_writer.FSWriter',
                 'options': {
                     'filebase': tmp_dir + '/some_file_'
                 }
             },
             'persistence': {
                 'name': 'tests.utils.NullPersistence',
             },
             'grouper': {
                 'name':
                 'exporters.groupers.file_key_grouper.FileKeyGrouper',
                 'options': {
                     'keys': ['country_code']
                 }
             }
         }
         self.exporter = exporter = BaseExporter(options)
         exporter.export()
         self.assertEqual(len(os.listdir(tmp_dir)), 3,
                          'There should be a file for every group')
Example #5
0
    def test_valid_bypass(self):
        notifier_class_path = 'exporters.notifications.base_notifier.BaseNotifier'
        options = {
            'reader': {
                # error reader and writer should give
                # failed test if they get used
                'name': 'tests.utils.ErrorReader'
            },
            'writer': {
                'name': 'tests.utils.ErrorWriter'
            },
            'exporter_options': {
                'notifications': [{
                    'name': notifier_class_path
                }]
            },
            'persistence': {
                'name': 'tests.utils.NullPersistence',
            }
        }
        with mock.patch.multiple(notifier_class_path,
                                 notify_start_dump=DEFAULT,
                                 notify_failed_job=DEFAULT,
                                 notify_complete_dump=DEFAULT) as m:
            self.exporter = exporter = BaseExporter(options)

            class Bypass(BaseBypass):
                @classmethod
                def meets_conditions(cls, config):
                    return True

                def execute(self):
                    pass

            exporter.bypass_cases.append(Bypass)
            exporter.export()
            exporter.writer.close()
            self.assertGreaterEqual(
                len,
                2,
                msg=('bypass class should have at least `meets_conditions` '
                     'and `bypass` methods called'))
            self.assertEquals(len(m['notify_start_dump'].mock_calls),
                              1,
                              msg='There was 1 start dump notification')
            self.assertEquals(len(m['notify_failed_job'].mock_calls),
                              0,
                              msg='There should be no failed job notification')
            self.assertEquals(len(m['notify_complete_dump'].mock_calls),
                              1,
                              msg='There was 1 complete dump notification')
Example #6
0
    def test_when_unmet_conditions_bypass_should_not_be_called(self):
        # given:
        self.exporter = exporter = BaseExporter(self.build_config())

        class Bypass(FakeBypass):
            executed = False

        Bypass.fake_meet_conditions = False
        exporter.bypass_cases = [Bypass]

        # when:
        exporter.export()

        # then:
        self.assertFalse(Bypass.executed, "Bypass should NOT have been called")
Example #7
0
 def test_export_with_csv_formatter(self):
     config = self.build_config()
     config['exporter_options']['formatter'] = {
         'name':
         'exporters.export_formatter.csv_export_formatter.CSVExportFormatter',
         'options': {
             'show_titles': True,
             'fields': ['city', 'country_code']
         }
     }
     self.exporter = exporter = BaseExporter(config)
     exporter.export()
     expected_count = 10
     self.assertEquals(expected_count,
                       exporter.writer.get_metadata('items_count'))
Example #8
0
    def test_bypass_should_be_called(self):
        # given:
        self.exporter = exporter = BaseExporter(self.build_config())

        class Bypass(FakeBypass):
            executed = False

        exporter.bypass_cases = [Bypass]

        # when:
        exporter.export()
        exporter.writer.close()

        # then:
        self.assertTrue(Bypass.executed, "Bypass should have been called")
        self.assertTrue(exporter.metadata.bypassed_pipeline)
Example #9
0
    def test_when_meet_conditions_but_config_prevent_bypass_it_should_not_be_called(
            self):
        # given:
        config = self.build_config()
        config['exporter_options']['prevent_bypass'] = True
        self.exporter = exporter = BaseExporter(config)

        class Bypass(FakeBypass):
            executed = False

        exporter.bypass_cases = [Bypass]

        # when:
        exporter.export()

        # then:
        self.assertFalse(Bypass.executed, "Bypass should NOT have been called")
Example #10
0
    def test_when_failing_bypass_should_raise_error(self):
        # given:
        self.exporter = exporter = BaseExporter(self.build_config())

        class Bypass(FakeBypass):
            executed = False

            def execute(self):
                super(Bypass, self).execute()
                raise RuntimeError("ooops")

        exporter.bypass_cases = [Bypass]

        # when calling export, should raise the error:
        with self.assertRaisesRegexp(RuntimeError, "ooops"):
            exporter.export()

        exporter.writer.close()
Example #11
0
 def test_when_writing_only_on_flush_should_notify_job_failure(
         self, mock_notifier):
     config = self.build_config(writer={
         'name': 'exporters.writers.FTPWriter',
         'options': {
             'host': 'ftp.invalid.com',
             'filebase': '_',
             'ftp_user': '******',
             'ftp_password': '******',
         }
     }, )
     self.exporter = exporter = BaseExporter(config)
     with self.assertRaisesRegexp(ValueError, "fail"):
         exporter.export()
     self.assertFalse(
         mock_notifier.return_value.notify_complete_dump.called,
         "Should not notify a successful dump")
     self.assertTrue(mock_notifier.return_value.notify_failed_job.called,
                     "Should notify the job failure")
Example #12
0
    def test_disabling_retries(self):
        count_holder = [0]
        options = {
            'reader': {
                'name': 'exporters.readers.random_reader.RandomReader',
                'options': {
                    'number_of_items': 17,
                    'batch_size': 3
                }
            },
            'writer': {
                'name': 'tests.utils.NullWriter',
                'options': {
                    'items_limit': 5
                }
            },
            'persistence': {
                'name': 'tests.utils.NullPersistence',
            },
            'exporter_options': {
                'notifications': [{
                    'name':
                    'exporters.notifications.webhook_notifier.WebhookNotifier',
                    'options': {
                        'endpoints': ['http://endpoint']
                    }
                }],
                'disable_retries':
                True
            },
        }

        def count_and_raise(*args, **kwargs):
            count_holder[0] += 1
            raise RuntimeError('test exception')

        self.exporter = exporter = BaseExporter(options)
        with mock.patch('requests.post', side_effect=count_and_raise):
            exporter.export()

        # there should be 2 posts: for started and completed dump
        self.assertEqual(count_holder[0], 2, "Retries should be disabled")
Example #13
0
 def test_notifier_integration_export_fail(self):
     notifier_class_path = 'exporters.notifications.base_notifier.BaseNotifier'
     options = {
         'reader': {
             'name': 'exporters.readers.random_reader.RandomReader',
             'options': {
                 'number_of_items': 10,
                 'batch_size': 3
             }
         },
         'writer': {
             'name': 'tests.utils.ErrorWriter'
         },
         'exporter_options': {
             'notifications': [{
                 'name': notifier_class_path
             }]
         },
         'persistence': {
             'name': 'tests.utils.NullPersistence',
         }
     }
     with mock.patch.multiple(notifier_class_path,
                              notify_start_dump=DEFAULT,
                              notify_failed_job=DEFAULT,
                              notify_complete_dump=DEFAULT) as m:
         self.exporter = exporter = BaseExporter(options)
         with self.assertRaisesRegexp(RuntimeError, ErrorWriter.msg):
             exporter.export()
             exporter.writer.close()
         self.assertEquals(len(m['notify_start_dump'].mock_calls),
                           1,
                           msg='There was 1 start dump notification')
         self.assertEquals(len(m['notify_failed_job'].mock_calls),
                           1,
                           msg='There was 1 failed job notification')
         _, args, _ = m['notify_failed_job'].mock_calls[0]
         self.assertEquals(args[0], ErrorWriter.msg)
         self.assertEquals(
             len(m['notify_complete_dump'].mock_calls),
             0,
             msg='There should be no complete dump notification')
Example #14
0
 def test_non_empty_files(self):
     config = self.build_config(
         writer={
             'name': 'exporters.writers.fs_writer.FSWriter',
             'options': {
                 'filebase': os.path.join(self.tmp_dir, 'some_files_'),
                 'items_per_buffer_write': 1,
             }
         },
         reader={
             'name': 'exporters.readers.random_reader.RandomReader',
             'options': {
                 'number_of_items': 1,
                 'batch_size': 1
             }
         })
     self.exporter = exporter = BaseExporter(config)
     exporter.export()
     self.assertEqual(len(exporter.writer.written_files), 1,
                      'Wrong number of files compared to writer metadata')
Example #15
0
 def test_read_goes_into_written(self):
     options = {
         'reader': {
             'name': 'exporters.readers.random_reader.RandomReader',
             'options': {
                 'number_of_items': 123,
                 'batch_size': 7
             }
         },
         'writer': {
             'name': 'tests.utils.NullWriter'
         },
         'persistence': {
             'name': 'tests.utils.NullPersistence',
         }
     }
     self.exporter = exporter = BaseExporter(options)
     exporter.export()
     self.assertEquals(
         exporter.reader.get_metadata('read_items'),
         exporter.writer.get_metadata('items_count'),
         msg="Export should write the same number items as it has read")
Example #16
0
 def test_persisted_positions(self):
     pers_class_path = 'tests.utils.NullPersistence'
     options = {
         'reader': {
             'name': 'exporters.readers.random_reader.RandomReader',
             'options': {
                 'number_of_items': 17,
                 'batch_size': 3
             }
         },
         'writer': {
             'name': 'tests.utils.NullWriter'
         },
         'persistence': {
             'name': pers_class_path,
         }
     }
     self.exporter = exporter = BaseExporter(options)
     with mock.patch.object(exporter.persistence, 'commit_position') as m:
         exporter.export()
         last_read = [
             args[0]['last_read'] for name, args, kwargs in m.mock_calls
         ]
         self.assertEqual(last_read, [2, 5, 8, 11, 14, 16])
Example #17
0
 def test_writer_items_limit(self):
     options = {
         'reader': {
             'name': 'exporters.readers.random_reader.RandomReader',
             'options': {
                 'number_of_items': 17,
                 'batch_size': 3
             }
         },
         'writer': {
             'name': 'tests.utils.NullWriter',
             'options': {
                 'items_limit': 5
             }
         },
         'persistence': {
             'name': 'tests.utils.NullPersistence',
         }
     }
     self.exporter = exporter = BaseExporter(options)
     exporter.export()
     self.assertEqual(exporter.writer.get_metadata('items_count'),
                      5,
                      msg='There should be only 5 written items')
Example #18
0
    def test_resume_items(self):
        with TmpFile() as pickle_file:
            # given:
            persistence_data = {
                'last_position': {
                    'accurate_items_count': False,
                    'writer_metadata': {
                        'items_count': 30
                    },
                    'last_key': 3
                },
            }
            pickle.dump(persistence_data, open(pickle_file, 'w'))

            config = self.build_config(
                exporter_options={
                    'resume': True,
                    'persistence_state_id': os.path.basename(pickle_file),
                },
                persistence={
                    'name':
                    'exporters.persistence.pickle_persistence.PicklePersistence',
                    'options': {
                        'file_path': os.path.dirname(pickle_file)
                    }
                })

            # when:
            self.exporter = exporter = BaseExporter(config)
            exporter._init_export_job()

            # then:
            self.assertEqual(30, exporter.writer.get_metadata('items_count'))
            self.assertFalse(exporter.metadata.accurate_items_count,
                             "Couldn't get accurate count from last_position")
            exporter.writer.close()
Example #19
0
 def test_simple_export(self):
     self.exporter = exporter = BaseExporter(self.build_config())
     exporter.export()
     self.assertEquals(10, exporter.writer.get_metadata('items_count'))