Ejemplo n.º 1
0
 def test_create_large_file(self):
     # Max number capable of storage in Postgres integer field, plus 1
     # (errors out with IntegerField; passes with BigInt)
     upload = utils.get_test_data_upload(self.user, self.dataset, size=2147483648)
     # Test BigInt outer boundaries
     # Max bigint number
     upload2 = utils.get_test_data_upload(self.user, self.dataset, size=9223372036854775807)
     self.assertEqual(upload2.size, 9223372036854775807)
     # Max bigint + 1
     self.assertRaises(DatabaseError, utils.get_test_data_upload, self.user, self.dataset, size=9223372036854775808)
Ejemplo n.º 2
0
    def test_import_additional_data_same_columns(self):
        self.dataset.import_data(self.user, self.upload)

        utils.wait()

        xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME)
        
        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.import_data(self.user, xls_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        upload = DataUpload.objects.get(id=self.upload.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)
        
        self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None])
        self.assertEqual(dataset.row_count, 8)
        self.assertEqual(upload.imported, True)
        self.assertEqual(xls_upload.imported, True)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2)
Ejemplo n.º 3
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        upload_id = upload.id
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        solr.delete(settings.SOLR_DATA_CORE, '*:*')
        self.dataset.import_data(self.user, upload)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 1)

        upload = DataUpload.objects.get(id=upload_id)

        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, upload)
        self.assertEqual(dataset.row_count, 4)

        upload.delete()

        # Ensure dataset still exists
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, None)
        self.assertEqual(dataset.row_count, 0)

        self.assertEqual(os.path.exists(path), False)

        with self.assertRaises(DataUpload.DoesNotExist):
            DataUpload.objects.get(id=upload_id)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 0)
Ejemplo n.º 4
0
    def test_import_additional_data_same_columns(self):
        self.dataset.import_data(self.user, self.upload)

        xls_upload = utils.get_test_data_upload(self.user, self.dataset,
                                                utils.TEST_XLS_FILENAME)

        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.import_data(self.user, xls_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        upload = DataUpload.objects.get(id=self.upload.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)

        self.assertEqual([c['name'] for c in dataset.column_schema],
                         ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema],
                         ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema],
                         [None, None, None, None])
        self.assertEqual(dataset.row_count, 8)
        self.assertEqual(upload.imported, True)
        self.assertEqual(xls_upload.imported, True)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 2)
Ejemplo n.º 5
0
    def test_import_xls(self):
        xls_upload = utils.get_test_data_upload(self.user, self.dataset,
                                                utils.TEST_XLS_FILENAME)

        self.dataset.import_data(self.user, xls_upload)

        task = self.dataset.current_task

        self.assertNotEqual(task, None)
        self.assertNotEqual(task.id, None)
        self.assertEqual(task.task_name, 'panda.tasks.import.xls')

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)
        task = TaskStatus.objects.get(id=task.id)

        self.assertEqual([c['name'] for c in dataset.column_schema],
                         ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema],
                         ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema],
                         [None, None, None, None])
        self.assertEqual(dataset.row_count, 4)
        self.assertEqual(xls_upload.imported, True)
        self.assertEqual(task.status, 'SUCCESS')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual(task.traceback, None)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 1)
Ejemplo n.º 6
0
    def test_import_additional_data_different_columns(self):
        self.dataset.import_data(self.user, self.upload)

        utils.wait()

        xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME)
        xls_upload.columns = ['id', 'first_name', 'last_name', 'employer', 'MORE COLUMNS!']
        xls_upload.save()
        
        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertRaises(DataImportError, self.dataset.import_data, self.user, xls_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        upload = DataUpload.objects.get(id=self.upload.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)
        
        self.assertEqual(dataset.columns, ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual(dataset.row_count, 4)
        self.assertEqual(upload.imported, True)
        self.assertEqual(xls_upload.imported, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
Ejemplo n.º 7
0
    def test_import_excel_xlsx(self):
        xlsx_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME)

        self.dataset.import_data(self.user, xlsx_upload)

        task = self.dataset.current_task

        self.assertNotEqual(task, None)
        self.assertNotEqual(task.id, None)
        self.assertEqual(task.task_name, 'panda.tasks.import.xlsx')

        utils.wait()

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        xlsx_upload = DataUpload.objects.get(id=xlsx_upload.id)
        task = TaskStatus.objects.get(id=task.id)

        self.assertEqual(dataset.columns, ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual(dataset.row_count, 4)
        self.assertEqual(xlsx_upload.imported, True)
        self.assertEqual(task.status, 'SUCCESS')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual(task.traceback, None)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
Ejemplo n.º 8
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        upload_id = upload.id
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        solr.delete(settings.SOLR_DATA_CORE, '*:*')
        self.dataset.import_data(self.user, upload)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)

        upload = DataUpload.objects.get(id=upload_id)
        
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, upload)
        self.assertEqual(dataset.row_count, 4)

        upload.delete()

        # Ensure dataset still exists
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, None)
        self.assertEqual(dataset.row_count, 0)

        self.assertEqual(os.path.exists(path), False)

        with self.assertRaises(DataUpload.DoesNotExist):
            DataUpload.objects.get(id=upload_id)
        
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 0)
Ejemplo n.º 9
0
    def test_import_additional_xlsx_typed_columns(self):
        self.dataset.import_data(self.user, self.upload)

        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.reindex_data(self.user, typed_columns=[True, False, True, True])

        second_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME)
        
        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.import_data(self.user, second_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        
        self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema], [True, False, True, True])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_int_id', None, 'column_unicode_last_name', 'column_unicode_employer'])
        self.assertEqual(dataset.row_count, 8)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_int_id:2')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_last_name:Germuska')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_first_name:Joseph')['response']['numFound'], 0)
Ejemplo n.º 10
0
    def test_reindex_complex(self):
        upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME)
        self.dataset.import_data(self.user, upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        dataset.reindex_data(self.user, typed_columns=[True for c in upload.columns])

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        task = dataset.current_task

        self.assertEqual(task.status, 'SUCCESS')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual(task.traceback, None)

        self.assertEqual([c['name'] for c in dataset.column_schema], ['text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column', ''])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime', None, 'unicode'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema], [True for c in upload.columns])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_unicode_text', 'column_date_date', 'column_int_integer', 'column_bool_boolean', 'column_float_float', 'column_time_time', 'column_datetime_datetime', None, 'column_unicode_'])
        self.assertEqual([c['min'] for c in dataset.column_schema], [None, u'1920-01-01T00:00:00', 40, None, 1.0, u'9999-12-31T00:00:00', u'1971-01-01T04:14:00', None, None])
        self.assertEqual([c['max'] for c in dataset.column_schema], [None, u'1971-01-01T00:00:00', 164, None, 41800000.01, u'9999-12-31T14:57:13', u'2048-01-01T14:57:00', None, None])
        self.assertEqual(dataset.row_count, 5)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_bool_boolean:true')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_text:"Chicago Tribune"')['response']['numFound'], 1)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]')['response']['numFound'], 1)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_time_time:[9999-12-31T04:13:01Z TO *]')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_date_date:[1971-01-01T00:00:00Z TO NOW]')['response']['numFound'], 1)
Ejemplo n.º 11
0
    def test_import_additional_data_different_columns(self):
        self.dataset.import_data(self.user, self.upload)

        xls_upload = utils.get_test_data_upload(self.user, self.dataset,
                                                utils.TEST_XLS_FILENAME)
        xls_upload.columns = [
            'id', 'first_name', 'last_name', 'employer', 'MORE COLUMNS!'
        ]
        xls_upload.save()

        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertRaises(DataImportError, self.dataset.import_data, self.user,
                          xls_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        upload = DataUpload.objects.get(id=self.upload.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)

        self.assertEqual([c['name'] for c in dataset.column_schema],
                         ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual(dataset.row_count, 4)
        self.assertEqual(upload.imported, True)
        self.assertEqual(xls_upload.imported, False)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 1)
Ejemplo n.º 12
0
    def test_reindex_complex(self):
        upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME)
        self.dataset.import_data(self.user, upload)

        utils.wait()

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        dataset.reindex_data(self.user, typed_columns=[True for c in upload.columns])

        utils.wait()

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertEqual([c['name'] for c in dataset.column_schema], ['text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column', ''])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'datetime', 'int', 'bool', 'float', 'datetime', 'datetime', 'NoneType', 'unicode'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema], [True for c in upload.columns])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_unicode_text', 'column_datetime_date', 'column_int_integer', 'column_bool_boolean', 'column_float_float', 'column_datetime_time', 'column_datetime_datetime', 'column_NoneType_empty_column', 'column_unicode_'])
        self.assertEqual(dataset.row_count, 5)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_bool_boolean:true')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_text:"Chicago Tribune"')['response']['numFound'], 1)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]')['response']['numFound'], 1)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_time:[9999-12-31T04:13:01Z TO *]')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_date:[1971-01-01T00:00:00Z TO NOW]')['response']['numFound'], 1)
Ejemplo n.º 13
0
    def test_import_xls(self):
        xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME)

        self.dataset.import_data(self.user, xls_upload)

        task = self.dataset.current_task

        self.assertNotEqual(task, None)
        self.assertNotEqual(task.id, None)
        self.assertEqual(task.task_name, 'panda.tasks.import.xls')

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        xls_upload = DataUpload.objects.get(id=xls_upload.id)
        task = TaskStatus.objects.get(id=task.id)

        self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None])
        self.assertEqual(dataset.row_count, 4)
        self.assertEqual(xls_upload.imported, True)
        self.assertEqual(task.status, 'SUCCESS')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual(task.traceback, None)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
Ejemplo n.º 14
0
    def test_undeletable(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)

        upload.deletable = False
        upload.save()

        with self.assertRaises(DataUploadNotDeletable):
            upload.delete()
Ejemplo n.º 15
0
    def setUp(self):
        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)

        self.auth_headers = utils.get_auth_headers()

        self.client = Client()
Ejemplo n.º 16
0
    def setUp(self):
        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)

        self.auth_headers = utils.get_auth_headers()

        self.client = Client()
Ejemplo n.º 17
0
    def test_undeletable(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)

        upload.deletable = False
        upload.save()
        
        with self.assertRaises(DataUploadNotDeletable):
            upload.delete()
Ejemplo n.º 18
0
    def setUp(self):
        settings.CELERY_ALWAYS_EAGER = True

        utils.setup_test_solr() 

        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)
Ejemplo n.º 19
0
    def setUp(self):
        settings.CELERY_ALWAYS_EAGER = True

        utils.setup_test_solr()

        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)
Ejemplo n.º 20
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        upload.delete()

        self.assertEqual(os.path.exists(path), False)
Ejemplo n.º 21
0
    def setUp(self):
        settings.CELERY_ALWAYS_EAGER = True
        
        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)

        self.auth_headers = utils.get_auth_headers()

        self.client = Client()
Ejemplo n.º 22
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        upload_id = upload.id
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        upload.delete()

        self.assertEqual(os.path.exists(path), False)

        with self.assertRaises(DataUpload.DoesNotExist):
            DataUpload.objects.get(id=upload_id)
Ejemplo n.º 23
0
    def test_created(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)

        self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME)
        self.assertEqual(upload.creator, self.user)
        self.assertNotEqual(upload.creation_date, None)
        self.assertEqual(upload.dataset, self.dataset)

        self.assertEqual(upload.data_type, 'csv')
        self.assertNotEqual(self.upload.dialect, None)
        self.assertEqual(self.upload.columns, ['id', 'first_name', 'last_name', 'employer']);
        self.assertEqual(len(self.upload.sample_data), 4)
        self.assertEqual(self.upload.sample_data[0], ['1', 'Brian', 'Boyer', 'Chicago Tribune']);
Ejemplo n.º 24
0
    def test_import_additional_xlsx_typed_columns(self):
        self.dataset.import_data(self.user, self.upload)

        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.reindex_data(self.user,
                                  typed_columns=[True, False, True, True])

        second_upload = utils.get_test_data_upload(
            self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME)

        # Refresh from database
        self.dataset = Dataset.objects.get(id=self.dataset.id)

        self.dataset.import_data(self.user, second_upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertEqual([c['name'] for c in dataset.column_schema],
                         ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual([c['type'] for c in dataset.column_schema],
                         ['int', 'unicode', 'unicode', 'unicode'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema],
                         [True, False, True, True])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [
            'column_int_id', None, 'column_unicode_last_name',
            'column_unicode_employer'
        ])
        self.assertEqual(dataset.row_count, 8)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 2)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_int_id:2')['response']['numFound'], 2)
        self.assertEqual(
            solr.query(
                settings.SOLR_DATA_CORE,
                'column_unicode_last_name:Germuska')['response']['numFound'],
            2)
        self.assertEqual(
            solr.query(
                settings.SOLR_DATA_CORE,
                'column_unicode_first_name:Joseph')['response']['numFound'], 0)
Ejemplo n.º 25
0
    def test_created(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)

        self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME)
        self.assertEqual(upload.creator, self.user)
        self.assertNotEqual(upload.creation_date, None)
        self.assertEqual(upload.dataset, self.dataset)

        self.assertEqual(upload.data_type, "csv")
        self.assertNotEqual(self.upload.dialect, None)
        self.assertEqual(self.upload.columns, ["id", "first_name", "last_name", "employer"])
        self.assertEqual(len(self.upload.sample_data), 4)
        self.assertEqual(self.upload.sample_data[0], ["1", "Brian", "Boyer", "Chicago Tribune"])

        self.assertEqual(len(self.upload.guessed_types), 4)
        self.assertEqual(self.upload.guessed_types, ["int", "unicode", "unicode", "unicode"])
Ejemplo n.º 26
0
    def setUp(self):
        settings.CELERY_ALWAYS_EAGER = True

        utils.setup_test_solr()

        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)

        self.dataset.import_data(self.user, self.upload, 0)

        utils.wait()

        self.auth_headers = utils.get_auth_headers()

        self.client = Client()
Ejemplo n.º 27
0
    def test_change_user_reindex(self):
        solr.delete(settings.SOLR_DATASETS_CORE, '*:*') 

        self.user.first_name = 'bazbarfoo'
        self.user.save()

        dataset = utils.get_test_dataset(self.user)
        upload = utils.get_test_data_upload(self.user, dataset)
        
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
        old_name = dataset.creator.first_name

        dataset.creator.first_name = 'foobarbaz'
        dataset.creator.save()

        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, old_name)['response']['numFound'], 0)
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
Ejemplo n.º 28
0
    def test_change_user_reindex(self):
        solr.delete(settings.SOLR_DATASETS_CORE, '*:*') 

        self.user.first_name = 'bazbarfoo'
        self.user.save()

        dataset = utils.get_test_dataset(self.user)
        upload = utils.get_test_data_upload(self.user, dataset)
        
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
        old_name = dataset.creator.first_name

        dataset.creator.first_name = 'foobarbaz'
        dataset.creator.save()

        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, old_name)['response']['numFound'], 0)
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
Ejemplo n.º 29
0
    def test_created(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)

        self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME)
        self.assertEqual(upload.creator, self.user)
        self.assertNotEqual(upload.creation_date, None)
        self.assertEqual(upload.dataset, self.dataset)

        self.assertEqual(upload.data_type, 'csv')
        self.assertNotEqual(self.upload.dialect, None)
        self.assertEqual(self.upload.columns,
                         ['id', 'first_name', 'last_name', 'employer'])
        self.assertEqual(len(self.upload.sample_data), 4)
        self.assertEqual(self.upload.sample_data[0],
                         ['1', 'Brian', 'Boyer', 'Chicago Tribune'])

        self.assertEqual(len(self.upload.guessed_types), 4)
        self.assertEqual(self.upload.guessed_types,
                         ['int', 'unicode', 'unicode', 'unicode'])

        self.assertEqual(upload.deletable, True)
Ejemplo n.º 30
0
    def test_reindex_with_currency(self):
        upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_MONEY)
        self.dataset.import_data(self.user, upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        dataset.reindex_data(self.user, typed_columns=[False, True], column_types=['unicode', 'float'])

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertEqual([c['name'] for c in dataset.column_schema], ['product', 'price'])
        self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'float'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema], [False, True])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, 'column_float_price'])
        self.assertEqual([c['min'] for c in dataset.column_schema], [None, 39.99])
        self.assertEqual([c['max'] for c in dataset.column_schema], [None, 2599.00])

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:39.99')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:[1500 TO *]')['response']['numFound'], 2)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:*')['response']['numFound'], 8)
Ejemplo n.º 31
0
    def test_reindex_with_currency(self):
        upload = utils.get_test_data_upload(self.user,
                                            self.dataset,
                                            filename=utils.TEST_MONEY)
        self.dataset.import_data(self.user, upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        dataset.reindex_data(self.user,
                             typed_columns=[False, True],
                             column_types=['unicode', 'float'])

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        self.assertEqual([c['name'] for c in dataset.column_schema],
                         ['product', 'price'])
        self.assertEqual([c['type'] for c in dataset.column_schema],
                         ['unicode', 'float'])
        self.assertEqual([c['indexed'] for c in dataset.column_schema],
                         [False, True])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema],
                         [None, 'column_float_price'])
        self.assertEqual([c['min'] for c in dataset.column_schema],
                         [None, 39.99])
        self.assertEqual([c['max'] for c in dataset.column_schema],
                         [None, 2599.00])

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_float_price:39.99')['response']['numFound'], 2)
        self.assertEqual(
            solr.query(
                settings.SOLR_DATA_CORE,
                'column_float_price:[1500 TO *]')['response']['numFound'], 2)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_float_price:*')['response']['numFound'], 8)
Ejemplo n.º 32
0
    def test_import_encoded_data(self):
        """
        This tests for a complicated case where a UnicodeDecodeError
        during import could be masked by an AttrbiuteError in the
        return handler.
        """
        old_sniffer_size = settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE
        settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = 50

        data_upload = utils.get_test_data_upload(
            self.user, self.dataset, utils.TEST_LATIN1_DATA_FILENAME)

        self.dataset.import_data(self.user, data_upload)

        task = self.dataset.current_task

        self.assertNotEqual(task, None)
        self.assertNotEqual(task.id, None)
        self.assertEqual(task.task_name, 'panda.tasks.import.csv')

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        data_upload = DataUpload.objects.get(id=data_upload.id)
        task = TaskStatus.objects.get(id=task.id)

        self.assertEqual(len(dataset.column_schema), 8)
        self.assertEqual(dataset.row_count, None)
        self.assertEqual(data_upload.imported, False)
        self.assertEqual(task.status, 'FAILURE')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual('encoded' in task.traceback, True)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'walking')['response']['numFound'], 0)

        settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = old_sniffer_size
Ejemplo n.º 33
0
    def test_import_encoded_data(self):
        """
        This tests for a complicated case where a UnicodeDecodeError
        during import could be masked by an AttrbiuteError in the
        return handler.
        """
        old_sniffer_size = settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE
        settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = 50

        data_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_DATA_FILENAME)

        self.dataset.import_data(self.user, data_upload)

        task = self.dataset.current_task

        self.assertNotEqual(task, None)
        self.assertNotEqual(task.id, None)
        self.assertEqual(task.task_name, 'panda.tasks.import.csv')

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        data_upload = DataUpload.objects.get(id=data_upload.id)
        task = TaskStatus.objects.get(id=task.id)

        self.assertEqual(len(dataset.column_schema), 8)
        self.assertEqual(dataset.row_count, None)
        self.assertEqual(data_upload.imported, False)
        self.assertEqual(task.status, 'FAILURE')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual('encoded' in task.traceback, True)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'walking')['response']['numFound'], 0)

        settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = old_sniffer_size
Ejemplo n.º 34
0
 def test_sample_encoding_fails(self):
     with self.assertRaises(DataSamplingError):
         utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_FILENAME)
Ejemplo n.º 35
0
 def test_sample_encoding_success(self):
     utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_FILENAME, encoding='latin1')
Ejemplo n.º 36
0
 def test_sample_encoding_success(self):
     utils.get_test_data_upload(self.user,
                                self.dataset,
                                utils.TEST_LATIN1_FILENAME,
                                encoding='latin1')
Ejemplo n.º 37
0
 def test_sample_encoding_fails(self):
     with self.assertRaises(DataSamplingError):
         utils.get_test_data_upload(self.user, self.dataset,
                                    utils.TEST_LATIN1_FILENAME)
Ejemplo n.º 38
0
    def test_reindex_complex(self):
        upload = utils.get_test_data_upload(
            self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME)
        self.dataset.import_data(self.user, upload)

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)

        dataset.reindex_data(self.user,
                             typed_columns=[True for c in upload.columns])

        # Refresh from database
        dataset = Dataset.objects.get(id=self.dataset.id)
        task = dataset.current_task

        self.assertEqual(task.status, 'SUCCESS')
        self.assertNotEqual(task.start, None)
        self.assertNotEqual(task.end, None)
        self.assertEqual(task.traceback, None)

        self.assertEqual([c['name'] for c in dataset.column_schema], [
            'text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime',
            'empty_column', ''
        ])
        self.assertEqual([c['type'] for c in dataset.column_schema], [
            'unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime',
            None, 'unicode'
        ])
        self.assertEqual([c['indexed'] for c in dataset.column_schema],
                         [True for c in upload.columns])
        self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [
            'column_unicode_text', 'column_date_date', 'column_int_integer',
            'column_bool_boolean', 'column_float_float', 'column_time_time',
            'column_datetime_datetime', None, 'column_unicode_'
        ])
        self.assertEqual([c['min'] for c in dataset.column_schema], [
            None, u'1920-01-01T00:00:00', 40, None, 1.0,
            u'9999-12-31T00:00:00', u'1971-01-01T04:14:00', None, None
        ])
        self.assertEqual([c['max'] for c in dataset.column_schema], [
            None, u'1971-01-01T00:00:00', 164, None, 41800000.01,
            u'9999-12-31T14:57:13', u'2048-01-01T14:57:00', None, None
        ])
        self.assertEqual(dataset.row_count, 5)
        self.assertEqual(dataset.locked, False)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_bool_boolean:true')['response']['numFound'], 2)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_unicode_text:"Chicago Tribune"')['response']
            ['numFound'], 1)
        self.assertEqual(
            solr.query(
                settings.SOLR_DATA_CORE,
                'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]')
            ['response']['numFound'], 1)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_time_time:[9999-12-31T04:13:01Z TO *]')
            ['response']['numFound'], 2)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'column_date_date:[1971-01-01T00:00:00Z TO NOW]')
            ['response']['numFound'], 1)
Ejemplo n.º 39
0
    def setUp(self):
        self.validator = DataValidation()

        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)
Ejemplo n.º 40
0
    def setUp(self):
        self.validator = DataValidation()

        self.user = utils.get_panda_user()
        self.dataset = utils.get_test_dataset(self.user)
        self.upload = utils.get_test_data_upload(self.user, self.dataset)