Esempio n. 1
0
 def testInvalidParameters(self):
     # Test conditions that should return None
     from girder.plugins.database_assetstore import assetstore
     from girder.plugins.database_assetstore.base import DB_INFO_KEY, DB_ASSETSTORE_ID
     self.assertIsNone(assetstore.getDbInfoForFile({}))
     self.assertIsNone(assetstore.getDbInfoForFile(
         {DB_INFO_KEY: {}, 'assetstoreId': 'unknown'}, {'type': 'unknown'}))
     self.assertEqual(assetstore.getQueryParamsForFile({}), {})
     self.assertEqual(assetstore.getQueryParamsForFile(
         {DB_INFO_KEY: {'a': 'b'}}), {})
     self.assertEqual(assetstore.getQueryParamsForFile(
         {DB_INFO_KEY: {'sort': 'b'}}), {'sort': 'b'})
     # Test with non-database assetstore
     resp = self.request(path='/assetstore', method='GET', user=self.admin)
     self.assertStatusOk(resp)
     self.assertEqual(2, len(resp.json))
     assetstore1 = resp.json[0]
     self.assertIsNone(assetstore.validateFile(
         {DB_INFO_KEY: {}, 'assetstoreId': str(assetstore1['_id'])}))
     # Test database validation
     resp = self.request(path='/assetstore', method='POST', user=self.admin,
                         params=self.dbParams2)
     self.assertStatusOk(resp)
     assetstore1 = resp.json
     with six.assertRaisesRegex(self, Exception, 'must have a non-blank'):
         self.assertIsNone(assetstore.validateFile({
             DB_INFO_KEY: {'table': 'sample'},
             'assetstoreId': DB_ASSETSTORE_ID}))
Esempio n. 2
0
    def mapReduce(self, item, mapScript, reduceScript, params={}):
        # Get the current or the anonymous user
        user = self.getCurrentUser()
        if user is None:
            user = self.app.anonymousAccess.getAnonymousUser()

        # Figure out what kind of assetstore the file is in
        fileInfo = self.model('file').load(item['meta']['rlab']['fileId'],
                                           level=AccessType.READ,
                                           user=user,
                                           exc=True)
        dbInfo = getDbInfoForFile(fileInfo)

        # Okay, figure out how/where we want to run our mapreduce code
        if dbInfo is not None:
            if dbInfo['type'] == 'mongo':
                collection = self.getMongoCollection(dbInfo)
                result = self.mongoMapReduce(mapScript, reduceScript,
                                             collection, params)
            else:
                raise RestException('MapReduce for ' + dbInfo['type'] +
                                    ' databases is not yet supported')
        else:
            result = self.mapReduceViaDownload(item, user, mapScript,
                                               reduceScript, params)

        return result
Esempio n. 3
0
    def setupDataset(self, item, params, user):
        metadata = item.get('meta', {})
        rlab = metadata.get('rlab', {})
        rlab['itemType'] = 'dataset'
        rlab['versionNumber'] = self.app.versioning.versionNumber({})

        # Determine fileId
        if 'fileId' in params:
            # We were given the fileId
            rlab['fileId'] = params['fileId']
        else:
            # Use the first file in this item
            childFiles = [f for f in self.model('item').childFiles(item=item)]
            if (len(childFiles) == 0):
                raise RestException('Item contains no files')
            rlab['fileId'] = childFiles[0]['_id']

        # Determine format
        fileObj = None
        if 'fileId' in rlab:
            fileObj = self.model('file').load(rlab['fileId'], user=user)
            if getDbInfoForFile(fileObj) is not None:
                # All database info is returned internally as CSV.
                # TODO: this will change soon!
                rlab['format'] = 'csv'
                params['dialect'] = '{}'  # use default parsing settings
            else:
                exts = fileObj.get('exts', [])
                mimeType = fileObj.get('mimeType', '').lower()
                if 'json' in exts or 'json' in mimeType:
                    rlab['format'] = 'json'
                elif 'csv' in exts or 'tsv' in exts or 'csv' in mimeType or 'tsv' in mimeType:
                    rlab['format'] = 'csv'
                else:
                    raise RestException('Could not determine file format')

        # Format details
        if rlab['format'] == 'json':
            if 'jsonPath' in params:
                rlab['jsonPath'] = params['jsonPath']
            else:
                rlab['jsonPath'] = '$'
        elif rlab['format'] == 'csv':
            if 'dialect' in params:
                rlab['dialect'] = json.loads(params['dialect'])
            else:
                # use girder_worker's enhancements of csv.Sniffer()
                # to infer the dialect
                sample = functools.partial(self.model('file').download,
                                           fileObj,
                                           headers=False,
                                           endByte=self.sniffSampleSize)()
                reader = get_csv_reader(sample)
                dialect = reader.dialect
                # Check if it's a standard dialect (we have to do this
                # to get at details like the delimiter if it IS standard...
                # otherwise, they're directly accessible)
                try:
                    dialect = csv.get_dialect(dialect)
                except Exception:
                    pass

                # Okay, now dump all the parameters so that
                # we can reconstruct the dialect later
                rlab['dialect'] = {}
                for key, value in inspect.getmembers(dialect):
                    if key[0] == '_':
                        continue
                    rlab['dialect'][key] = value

        metadata['rlab'] = rlab
        item['meta'] = metadata

        return self.model('item').updateItem(item)
Esempio n. 4
0
    def testAssetstoreDownload(self):
        from girder.plugins.database_assetstore import assetstore
        from girder.plugins.database_assetstore import query

        for userAssetstore in (False, True):
            townItem, townFile, assetstore1 = self._createTownItem({
                'format': 'list',
                'fields': 'town,pop2010',
                'limit': '10'
            }, userAssetstore)

            resp = self.request(path='/item/%s/download' % str(townItem['_id']))
            self.assertStatusOk(resp)
            data = resp.json
            self.assertEqual(data['datacount'], 10)
            self.assertEqual(data['fields'], ['town', 'pop2010'])
            # Test extraParameters for format
            params = {
                'extraParameters': urllib.parse.urlencode({
                    'format': 'csv',
                    'limit': 5
                }),
                'contentDisposition': 'inline'
            }
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params,
                isJson=False)
            self.assertStatusOk(resp)
            data = self.getBody(resp)
            self.assertEqual(len(data.split('\r\n')), 7)
            self.assertEqual(data.split('\r\n', 1)[0], 'town,pop2010')
            # Test range requests
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params,
                isJson=False, additionalHeaders=[('Range', 'bytes=10-19')])
            self.assertStatus(resp, 206)
            self.assertEqual(self.getBody(resp), data[10:20])
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params,
                isJson=False, additionalHeaders=[('Range', 'bytes=50-')])
            self.assertStatus(resp, 206)
            self.assertEqual(self.getBody(resp), data[50:])
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params,
                isJson=False, additionalHeaders=[('Range', 'bytes=5000-')])
            self.assertStatus(resp, 206)
            self.assertEqual(self.getBody(resp), '')
            # Test more complex extraParameters
            extra = {
                'format': 'list',
                'fields': json.dumps(['town', 'pop2000', 'pop2010']),
                'sort': json.dumps([['pop2000', -1]]),
                'filters': json.dumps([{
                    'field': 'pop2000', 'operator': '<', 'value': 100000}]),
                'limit': 5
            }
            params = {'extraParameters': urllib.parse.urlencode(extra)}
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params)
            self.assertStatusOk(resp)
            data = resp.json
            self.assertEqual(data['datacount'], 5)
            self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010'])
            self.assertLess(int(data['data'][0][1]), 100000)
            self.assertLess(int(data['data'][1][1]), int(data['data'][0][1]))
            # Test with JSON extraParameters
            params = {'extraParameters': json.dumps(extra)}
            resp = self.request(
                path='/item/%s/download' % str(townItem['_id']), params=params)
            self.assertStatusOk(resp)
            data = resp.json
            self.assertEqual(data['datacount'], 5)
            self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010'])
            self.assertLess(int(data['data'][0][1]), 100000)
            self.assertLess(int(data['data'][1][1]), int(data['data'][0][1]))
            # Test a direct call
            townFile = list(Item().childFiles(item=townItem))[0]
            adapter = File().getAssetstoreAdapter(townFile)
            params = {
                'format': 'list',
                'fields': ['town', 'pop2000', 'pop2010'],
                'sort': [['pop2000', -1]],
                'filters': [{
                    'field': 'pop2000', 'operator': '<', 'value': 100000}],
                'limit': 5
            }
            func = adapter.downloadFile(townFile, headers=False,
                                        extraParameters=params)
            data = b''.join([part for part in func()])
            data = json.loads(data.decode('utf8'))
            self.assertEqual(data['datacount'], 5)
            self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010'])
            self.assertLess(int(data['data'][0][1]), 100000)
            self.assertLess(int(data['data'][1][1]), int(data['data'][0][1]))
            # Test a direct query with group
            params = {
                'format': 'rawlist',
                'sort': [
                    [{'func': 'count', 'param': {'field': 'town'}}, -1],
                    [{'func': 'max', 'param': {'field': 'town'}}, 1]
                ],
                'fields': [
                    {'func': 'max', 'param': {'field': 'town'}},
                    'pop2010',
                    {'func': 'count', 'param': {'field': 'town'}}
                ],
                'group': 'pop2010,popch80_90',
                'limit': 5,
            }
            data = query.queryDatabase(
                townFile['_id'], assetstore.getDbInfoForFile(townFile), params)
            data = list(data[0]())
            self.assertEqual(len(data), 5)
            self.assertEqual(data[0][0], 'ABINGTON')
            self.assertEqual(data[4][0], 'AGAWAM')

            # Test with bad extraParameters
            with six.assertRaisesRegex(self, Exception,
                                       'JSON-encoded dictionary, or a url'):
                adapter.downloadFile(townFile, headers=False, extraParameters=6)

            # Test with 0 and none limits
            params = {
                'format': 'list',
                'fields': 'town,pop2000',
                'sort': 'pop2000',
                'filters': json.dumps([{
                    'field': 'pop2000', 'operator': '>', 'value': 25000}]),
            }
            params['limit'] = 0
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            jsondata = b''.join([part for part in func()])
            data = json.loads(jsondata.decode('utf8'))
            self.assertEqual(data['datacount'], 0)
            self.assertEqual(data['fields'], ['town', 'pop2000'])
            # It shouldn't matter if we ask for this via json, query, or object
            func = adapter.downloadFile(
                townFile, headers=False,
                extraParameters=urllib.parse.urlencode(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=json.dumps(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)

            params['limit'] = 'none'
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            jsondata = b''.join([part for part in func()])
            data = json.loads(jsondata.decode('utf8'))
            self.assertEqual(data['datacount'], 71)
            self.assertEqual(data['fields'], ['town', 'pop2000'])
            # It shouldn't matter if we ask for this via json, query, or object
            func = adapter.downloadFile(
                townFile, headers=False,
                extraParameters=urllib.parse.urlencode(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=json.dumps(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)

            # None can also be used as unlimited
            params['limit'] = None
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            func = adapter.downloadFile(
                townFile, headers=False,
                extraParameters=urllib.parse.urlencode(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=json.dumps(params))
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            # filters can also be an object or tuple
            params['filters'] = json.loads(params['filters'])
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            self.assertEqual(b''.join([part for part in func()]), jsondata)
            params['filters'] = tuple(params['filters'])
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            self.assertEqual(b''.join([part for part in func()]), jsondata)

            # Test with group
            params['sort'] = [
                [{'func': 'count', 'param': {'field': 'town'}}, -1],
                [{'func': 'max', 'param': {'field': 'town'}}, 1]]
            params['fields'] = [
                {'func': 'max', 'param': {'field': 'town'}},
                'pop2010',
                {'func': 'count', 'param': {'field': 'town'}}]
            params['group'] = 'pop2010'
            params['limit'] = 5
            del params['filters']
            func = adapter.downloadFile(
                townFile, headers=False, extraParameters=params)
            jsondata = b''.join([part for part in func()])
            data = json.loads(jsondata.decode('utf8'))
            self.assertEqual(data['datacount'], 5)
            self.assertEqual(data['data'][0][0], 'DEDHAM')
            self.assertEqual(data['data'][0][2], 2)
            self.assertEqual(data['data'][4][0], 'ACTON')
            self.assertEqual(data['data'][4][2], 1)