def test_doesnt_mangle_data(self, share_source): rd = RawData(source=share_source, app_label='foo', data=b'This is just some data') rd.save() assert RawData.objects.first().data == 'This is just some data'
def test_must_have_source(self): rd = RawData(data='SomeData', app_label='foo') with pytest.raises(IntegrityError) as e: rd.save() assert 'null value in column "source_id" violates not-null constraint' in e.value.args[ 0]
def handle(self, *args, **options): if not options['harvester'] and options['all']: options['harvester'] = [k for k in self.map.keys()] if options['harvester']: connection = connections['migration_source'] # This is required to populate the connection object properly if connection.connection is None: connection.cursor() for source in options['harvester']: target = self.map[source] config = apps.get_app_config(target) print('{} -> {}'.format(source, target)) with transaction.atomic(using='migration_source'): with connection.connection.cursor( 'scrapi_migration') as cursor: cursor.execute(""" SELECT "docID", raw FROM webview_document WHERE source = '{source}' """.format(source=source)) with transaction.atomic(): record_count = 0 records = cursor.fetchmany(size=cursor.itersize) while records: bulk = [] for (doc_id, raw) in records: if raw is None or raw == 'null' or raw[ 'timestamps'] is None or raw[ 'timestamps'][ 'harvestFinished'] is None: print('{} -> {}: {} : raw is null'. format(source, target, doc_id)) continue harvest_finished = arrow.get( raw['timestamps']['harvestFinished']) data = raw['doc'].encode() bulk.append( RawData( source=config.user, app_label=config.label, provider_doc_id=doc_id, sha256=sha256(data).hexdigest(), data=data, date_seen=harvest_finished. datetime, date_harvested=harvest_finished. datetime, )) RawData.objects.bulk_create(bulk) record_count += len(records) print('{} -> {}: {}'.format( source, target, record_count)) records = cursor.fetchmany( size=cursor.itersize)
def test_must_have_data(self, share_source): rd = RawData(source=share_source, app_label='foo') with pytest.raises(exceptions.ValidationError) as e: rd.clean_fields() rd.save() assert 'This field cannot be blank.' == e.value.message_dict['data'][0]
def raw_data(share_source): raw_data = RawData(source=share_source, data={}) raw_data.save() return raw_data