コード例 #1
0
ファイル: datasource.py プロジェクト: sowmya-debug/mindsdb
    def put(self, name):
        '''add new datasource'''
        data = {}

        def on_field(field):
            print(f'\n\n{field}\n\n')
            name = field.field_name.decode()
            value = field.value.decode()
            data[name] = value

        def on_file(file):
            data['file'] = file.file_name.decode()

        temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_')

        if request.headers['Content-Type'].startswith('multipart/form-data'):
            parser = multipart.create_form_parser(
                headers=request.headers,
                on_field=on_field,
                on_file=on_file,
                config={
                    'UPLOAD_DIR': temp_dir_path.encode(),  # bytes required
                    'UPLOAD_KEEP_FILENAME': True,
                    'UPLOAD_KEEP_EXTENSIONS': True,
                    'MAX_MEMORY_FILE_SIZE': 0
                })

            while True:
                chunk = request.stream.read(8192)
                if not chunk:
                    break
                parser.write(chunk)
            parser.finalize()
            parser.close()
        else:
            data = request.json

        if 'query' in data:
            query = request.json['query']
            source_type = request.json['integration_id']
            ca.default_store.save_datasource(name, source_type, query)
            os.rmdir(temp_dir_path)
            return ca.default_store.get_datasource(name)

        ds_name = data['name'] if 'name' in data else name
        source = data['source'] if 'source' in data else name
        source_type = data['source_type']

        if source_type == 'file':
            file_path = os.path.join(temp_dir_path, data['file'])
        else:
            file_path = None

        ca.default_store.save_datasource(ds_name, source_type, source,
                                         file_path)
        os.rmdir(temp_dir_path)

        return ca.default_store.get_datasource(ds_name)
コード例 #2
0
    def put(self, name):
        '''add new datasource'''
        data = {}

        def on_field(field):
            name = field.field_name.decode()
            value = field.value.decode()
            data[name] = value

        file_object = None

        def on_file(file):
            nonlocal file_object
            data['file'] = file.file_name.decode()
            file_object = file.file_object

        temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_')

        if request.headers['Content-Type'].startswith('multipart/form-data'):
            parser = multipart.create_form_parser(
                headers=request.headers,
                on_field=on_field,
                on_file=on_file,
                config={
                    'UPLOAD_DIR': temp_dir_path.encode(),    # bytes required
                    'UPLOAD_KEEP_FILENAME': True,
                    'UPLOAD_KEEP_EXTENSIONS': True,
                    'MAX_MEMORY_FILE_SIZE': 0
                }
            )

            while True:
                chunk = request.stream.read(8192)
                if not chunk:
                    break
                parser.write(chunk)
            parser.finalize()
            parser.close()

            if file_object is not None and not file_object.closed:
                file_object.close()
        else:
            data = request.json

        if 'query' in data:
            source_type = request.json['integration_id']
            if source_type not in ca.default_store.config['integrations']:
                # integration doens't exist
                abort(400, f"{source_type} integration doesn't exist")

            if ca.default_store.config['integrations'][source_type]['type'] == 'mongodb':
                data['find'] = data['query']

            ca.default_store.save_datasource(name, source_type, data)
            os.rmdir(temp_dir_path)
            return ca.default_store.get_datasource(name)

        ds_name = data['name'] if 'name' in data else name
        source = data['source'] if 'source' in data else name
        source_type = data['source_type']

        if source_type == 'file':
            file_path = os.path.join(temp_dir_path, data['file'])
        else:
            file_path = None

        ca.default_store.save_datasource(ds_name, source_type, source, file_path)
        os.rmdir(temp_dir_path)

        return ca.default_store.get_datasource(ds_name)
コード例 #3
0
    def put(self, name):
        '''add new datasource'''
        data = {}
        def on_field(field):
            name = field.field_name.decode()
            value = field.value.decode()
            data[name] = value

        def on_file(file):
            data['file'] = file.file_name.decode()

        temp_dir_path = tempfile.mkdtemp(prefix='gateway_')

        if request.headers['Content-Type'].startswith('multipart/form-data'):
            parser = multipart.create_form_parser(
                headers=request.headers,
                on_field=on_field,
                on_file=on_file,
                config={
                    'UPLOAD_DIR': temp_dir_path.encode(),    # bytes required
                    'UPLOAD_KEEP_FILENAME': True,
                    'UPLOAD_KEEP_EXTENSIONS': True,
                    'MAX_MEMORY_FILE_SIZE': 0
                }
            )

            while True:
                chunk = request.stream.read(8192)
                if not chunk:
                    break
                parser.write(chunk)
            parser.finalize()
            parser.close()
        else:
            data = request.json

        if 'name' in data:
            datasource_name = data['name']
        else:
            datasource_name = name
        datasource_type = data['source_type']

        if 'source' in data:
            datasource_source = data['source']
        else:
            datasource_source = name

        if datasource_type == 'file' and 'file' not in data:
            abort(400, "Argument 'file' is missing")

        names = [x['name'] for x in get_datasources()]

        for i in range(1, 100):
            if datasource_name in names:
                previous_index = i - 1
                datasource_name = datasource_name.replace(f'({previous_index})', '')
                datasource_name += f'({i})'
            else:
                break

        os.mkdir(os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name))
        os.mkdir(os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'resources'))

        ds_dir = os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'datasource')
        os.mkdir(ds_dir)
        if datasource_type == 'file':
            datasource_source = os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'datasource', datasource_source)
            os.replace(
                os.path.join(temp_dir_path, data['file']),
                datasource_source
            )
            ds = FileDS(datasource_source)
        else:
            ds = FileDS(datasource_source)

        os.rmdir(temp_dir_path)

        df = ds.df
        columns = [dict(name=x) for x in list(df.keys())]
        row_count = len(df)

        df_with_types = cast_df_columns_types(df)
        create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types)

        new_data_source = {
            'name': datasource_name,
            'source_type': datasource_type,
            'source': datasource_source,
            'missed_files': False,
            'created_at': datetime.datetime.now(),
            'updated_at': datetime.datetime.now(),
            'row_count': row_count,
            'columns': columns
        }

        save_datasource_metadata(new_data_source)

        return get_datasource(datasource_name)
コード例 #4
0
    def put(self, name):
        '''add new datasource'''
        data = {}

        def on_field(field):
            name = field.field_name.decode()
            value = field.value.decode()
            data[name] = value

        file_object = None

        def on_file(file):
            nonlocal file_object
            data['file'] = file.file_name.decode()
            file_object = file.file_object

        temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_')

        if request.headers['Content-Type'].startswith('multipart/form-data'):
            parser = multipart.create_form_parser(
                headers=request.headers,
                on_field=on_field,
                on_file=on_file,
                config={
                    'UPLOAD_DIR': temp_dir_path.encode(),  # bytes required
                    'UPLOAD_KEEP_FILENAME': True,
                    'UPLOAD_KEEP_EXTENSIONS': True,
                    'MAX_MEMORY_FILE_SIZE': 0
                })

            while True:
                chunk = request.stream.read(8192)
                if not chunk:
                    break
                parser.write(chunk)
            parser.finalize()
            parser.close()

            if file_object is not None and not file_object.closed:
                file_object.close()
        else:
            data = request.json

        if 'query' in data:
            integration_id = request.json['integration_id']
            integration = get_db_integration(integration_id,
                                             request.company_id)
            if integration is None:
                abort(400, f"{integration_id} integration doesn't exist")

            if integration['type'] == 'mongodb':
                data['find'] = data['query']

            request.default_store.save_datasource(name, integration_id, data)
            os.rmdir(temp_dir_path)
            return request.default_store.get_datasource(name)

        ds_name = data['name'] if 'name' in data else name
        source = data['source'] if 'source' in data else name
        source_type = data['source_type']

        if source_type == 'file':
            file_path = os.path.join(temp_dir_path, data['file'])
            lp = file_path.lower()
            if lp.endswith(('.zip', '.tar.gz')):
                if lp.endswith('.zip'):
                    with zipfile.ZipFile(file_path) as f:
                        f.extractall(temp_dir_path)
                elif lp.endswith('.tar.gz'):
                    with tarfile.open(file_path) as f:
                        f.extractall(temp_dir_path)
                os.remove(file_path)
                files = os.listdir(temp_dir_path)
                if len(files) != 1:
                    os.rmdir(temp_dir_path)
                    return http_error(
                        400, 'Wrong content.',
                        'Archive must contain only one data file.')
                file_path = os.path.join(temp_dir_path, files[0])
                source = files[0]
                if not os.path.isfile(file_path):
                    os.rmdir(temp_dir_path)
                    return http_error(
                        400, 'Wrong content.',
                        'Archive must contain data file in root.')
        else:
            file_path = None

        request.default_store.save_datasource(ds_name, source_type, source,
                                              file_path)
        os.rmdir(temp_dir_path)

        return request.default_store.get_datasource(ds_name)