예제 #1
0
    def test_quick_query_iter_records_with_json_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file("SELECT name from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        listdata = list(resp.records())

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(
            listdata, [b'{"name":"owner"}', b'{}', b'{"name":"owner"}', b''])
    def test_quick_query_readall_with_json_serialization_setting(self):
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file(
            "SELECT name from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(query_result, b'{"name":"owner"};{};{"name":"owner"};')
예제 #3
0
    def test_quick_query_iter_records_with_fatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)

        for record in resp.records():
            print(record)
    def test_quick_query_readall_with_fatal_error_handler_raise(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            raise Exception(error.description)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        with pytest.raises(Exception):
            query_result = resp.readall()
def main():
    try:
        CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

    except KeyError:
        print("AZURE_STORAGE_CONNECTION_STRING must be set.")
        sys.exit(1)

    datalake_service_client = DataLakeServiceClient.from_connection_string(
        CONNECTION_STRING)
    filesystem_name = "quickqueryfilesystem"
    filesystem_client = datalake_service_client.get_file_system_client(
        filesystem_name)
    try:
        filesystem_client.create_file_system()
    except:
        pass
    # [START query]
    errors = []

    def on_error(error):
        errors.append(error)

    # upload the csv file
    file_client = datalake_service_client.get_file_client(
        filesystem_name, "csvfile")
    file_client.upload_data(CSV_DATA, overwrite=True)

    # select the second column of the csv file
    query_expression = "SELECT _2 from DataLakeStorage"
    input_format = DelimitedTextDialect(delimiter=',',
                                        quotechar='"',
                                        lineterminator='\n',
                                        escapechar="",
                                        has_header=False)
    output_format = DelimitedJsonDialect(delimiter='\n')
    reader = file_client.query_file(query_expression,
                                    on_error=on_error,
                                    file_format=input_format,
                                    output_format=output_format)
    content = reader.readall()
    # [END query]
    print(content)

    filesystem_client.delete_file_system()
예제 #6
0
    def test_quick_query_readall_with_fatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()