Exemple #1
0
def produce_collection_schema(collection):
    collection_name = collection.name
    collection_db_name = collection.database.name
    mdata = {}
    mdata = metadata.write(mdata, (), 'database-name', collection_db_name)
    mdata = metadata.write(mdata, (), 'row-count', collection.estimated_document_count())

    # Get indexes
    coll_indexes = collection.index_information()
    if coll_indexes.get('_id_'):
        mdata = metadata.write(mdata, (), 'table-key-properties', ['_id'])

    


    
    # If _id is in indexes, write `table-key-properties = ['_id']` metadata
    # If _id isn't present, look for indexes with unique=True, and write that as table-key-properties

    # Look for any indexes that aren't _id, and write them as 'valid-replication-key=[]' metadata
    
    return {
        'table_name': collection_name,
        'stream': collection_name,
        'metadata': metadata.to_list(mdata),
        'tap_stream_id': "{}-{}".format(collection_db_name, collection_name),
        'schema': {
            'type': 'object'
        }
    }
Exemple #2
0
    def select_all_fields_except(self, blacklisted_fields, schema, md):
        md = metadata.write(md, (), 'selected', True)
        for p in schema['properties'].keys():
            if p not in blacklisted_fields and p != 'day':
                md = metadata.write(md, ('properties', p), 'selected', True)

        return md
Exemple #3
0
def get_schemas(config, config_path):

    schemas = {}
    schemas_metadata = {}

    streams = get_streams(config, config_path)

    LOGGER.info('There are {:d} valid streams in MS Dynamics'.format(
        len(streams)))

    for stream_name, stream_object in streams.items():
        schema = stream_object.schema

        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method)

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys',
                                  stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(
                meta, ('properties', stream_object.replication_key),
                'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[stream_name] = schema
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Exemple #4
0
def get_schemas(config):
    schemas = {}
    schemas_metadata = {}
    client = S3Client(config['aws_access_key_id'], config['aws_secret_access_key'])

    for tap_stream_id, table_spec in config['tables'].items():
        LOGGER.info(f'Starting discovery for {tap_stream_id}')
        stream_object = Stream(client, table_spec, None)
        stream_schema = stream_object.get_schema()

        meta = metadata.get_standard_metadata(
            schema=stream_schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method
        )

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys', stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(meta, ('properties', stream_object.replication_key), 'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[tap_stream_id] = stream_schema
        schemas_metadata[tap_stream_id] = meta

    return schemas, schemas_metadata
Exemple #5
0
def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])

    for tap_stream_id in schemas.stream_ids:
        schema_dict = schemas.load_schema(tap_stream_id)
        schema = Schema.from_dict(schema_dict)

        mdata = metadata.get_standard_metadata(
            schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id])

        mdata = metadata.to_map(mdata)

        # NB: `lists` and `messages` are required for their substreams.
        # This is an approximation of the initial functionality using
        # metadata, which marked them as `selected=True` in the schema.
        if tap_stream_id in ['lists', 'messages']:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        for field_name in schema_dict['properties'].keys():
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')

        catalog.streams.append(
            CatalogEntry(stream=tap_stream_id,
                         tap_stream_id=tap_stream_id,
                         key_properties=schemas.PK_FIELDS[tap_stream_id],
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return catalog
Exemple #6
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():

        # TODO: populate any metadata and stream's key properties here..
        mock_mdata = metadata.get_standard_metadata(schema.to_dict())
        metadata.write(metadata.to_map(mock_mdata), (), "selected", True)
        mock_keyprops = ['id']

        stream_metadata = mock_mdata
        key_properties = mock_keyprops
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
def discover():
    '''
    Run discovery mode
    '''
    streams = []

    for stream_id, stream_object in STREAMS.items():
        raw_schema = load_schema(stream_id)
        schema = Schema.from_dict(raw_schema)

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema=raw_schema,
                schema_name=stream_id,
                key_properties=stream_object.key_properties,
                valid_replication_keys=[stream_object.replication_key],
                replication_method=stream_object.replication_method))

        # make sure that the replication key field is mandatory
        if stream_object.replication_key:
            metadata.write(mdata,
                           ('properties', stream_object.replication_key),
                           'inclusion', 'automatic')

        streams.append(
            CatalogEntry(stream=stream_id,
                         tap_stream_id=stream_id,
                         key_properties=stream_object.key_properties,
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return Catalog(streams)
Exemple #8
0
def discover():
    """
    Allow discovery of all streams and metadata
    """
    raw_schemas = load_schemas()
    streams = []

    for schema_name, schema in raw_schemas.items():
        mdata = metadata.new()
        mdata = metadata.write(mdata, (), 'table-key-properties', ['id'])
        mdata = metadata.write(mdata, ('properties', 'id'), 'inclusion',
                               'automatic')
        mdata = metadata.write(mdata, (), 'valid-replication-keys',
                               ['updated_at'])
        mdata = metadata.write(mdata, ('properties', 'updated_at'),
                               'inclusion', 'automatic')
        for field_name in schema['properties'].keys():
            if field_name not in {'id', 'updated_at'}:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

        # create and add catalog entry
        catalog_entry = {
            'stream': schema_name,
            'tap_stream_id': schema_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata),
            'key_properties': ['id']
        }
        streams.append(catalog_entry)

    return {'streams': streams}
Exemple #9
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, schema in raw_schemas.items():
        stream = STREAM_OBJECTS[stream_name]
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream.key_properties,
            replication_method=stream.replication_method )
        mdata = metadata.to_map(mdata)

        if stream.replication_key:
            mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key])

        for field_name in schema['properties'].keys():
            if field_name in stream.key_properties or field_name == stream.replication_key:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

        catalog_entry = {
            'stream': stream_name,
            'tap_stream_id': stream_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata),
            'key_properties': stream.key_properties}
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({'streams': catalog_entries})
Exemple #10
0
def get_metadata(schema, key_properties, replication_method, replication_key):
    mdata = metadata.new()
    mdata = metadata.write(mdata,
                           (),
                           'table-key-properties',
                           key_properties)
    mdata = metadata.write(mdata,
                           (),
                           'forced-replication-method',
                           replication_method)

    if replication_key:
        mdata = metadata.write(mdata,
                               (),
                               'valid-replication-keys',
                               [replication_key])

    for field_name in schema['properties'].keys():
        if field_name in key_properties \
                or field_name in [replication_key, "updated"]:
            mdata = metadata.write(mdata,
                                   ('properties', field_name),
                                   'inclusion',
                                   'automatic')
        else:
            mdata = metadata.write(mdata,
                                   ('properties', field_name),
                                   'inclusion',
                                   'available')

    return metadata.to_list(mdata)
Exemple #11
0
def discover_catalog(name, automatic_inclusion, **kwargs):
    unsupported = kwargs.get("unsupported", frozenset([]))
    stream_automatic_inclusion = kwargs.get("stream_automatic_inclusion",
                                            False)
    root = os.path.dirname(os.path.realpath(__file__))
    path = os.path.join(root, 'schemas/{}.json'.format(name))
    mdata = metadata.new()

    with open(path, "r") as f:
        discovered_schema = json.load(f)

        for field in discovered_schema["schema"]["properties"]:
            if field in automatic_inclusion:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'automatic')
            elif field in unsupported:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'unsupported')
            else:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'available')

        if stream_automatic_inclusion:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        discovered_schema["metadata"] = metadata.to_list(mdata)
        return discovered_schema
Exemple #12
0
def get_schema_for_type(typ, breadcrumb, mdata, null=False):
    # http://developers.marketo.com/rest-api/lead-database/fields/field-types/
    if typ in ['datetime', 'date']:
        rtn = {"type": "string", "format": "date-time"}
    elif typ in ['integer', 'percent', 'score']:
        rtn = {'type': 'integer'}
    elif typ in ['float', 'currency']:
        rtn = {'type': 'number'}
    elif typ == 'boolean':
        rtn = {'type': 'boolean'}
    elif typ in STRING_TYPES:
        rtn = {'type': 'string'}
    elif typ in ['array']:
        rtn = {
            'type': 'array',
            'items': {
                'type': ['integer', 'number', 'string', 'null']
            }
        }
    else:
        rtn = {'type': 'string'}

    if null:
        rtn["type"] = [rtn["type"], "null"]
        rtn["inclusion"] = "available"
        mdata = metadata.write(mdata, breadcrumb, 'inclusion', 'available')

    else:
        rtn["inclusion"] = "automatic"
        mdata = metadata.write(mdata, breadcrumb, 'inclusion', 'automatic')

    return rtn, mdata
Exemple #13
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, schema in raw_schemas.items():
        # create and add catalog entry
        stream = STREAM_OBJECTS[stream_name]
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream.key_properties,
            valid_replication_keys=stream.replication_keys,
            replication_method=stream.replication_method,
        )
        mdata = metadata.to_map(mdata)
        for field_name in stream.replication_keys:
            metadata.write(mdata, ('properties', field_name), 'inclusion',
                           'automatic')

        catalog_entry = {
            "stream": stream_name,
            "tap_stream_id": stream_name,
            "schema": schema,
            "metadata": metadata.to_list(mdata),
            "key_properties": stream.key_properties,
        }
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({"streams": catalog_entries})
Exemple #14
0
def _populate_metadata(schema_name: str, schema: Dict) -> Dict:
    """
    Populates initial metadata for each field in a schema.
    Args:
        schema_name: The schema name to generate metadata for e.g. 'general_ledger_accounts'.
        schema: The corresponding JSON schema.

    Returns: Metadata dictionary for the selected stream. Fields are disabled by default.

    """
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties',
                           KEY_PROPERTIES[schema_name])
    mdata = metadata.write(mdata, (), 'selected', False)

    for field_name in schema['properties']:
        if field_name in KEY_PROPERTIES[schema_name]:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'available')

            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected', False)

    return mdata
Exemple #15
0
def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])
    for stream in streams.STREAMS:
        schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id),
                                  inclusion="available")

        mdata = metadata.new()

        for prop in schema.properties:
            if prop in streams.PK_FIELDS[stream.tap_stream_id]:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'available')

        catalog.streams.append(
            CatalogEntry(
                stream=stream.tap_stream_id,
                tap_stream_id=stream.tap_stream_id,
                key_properties=streams.PK_FIELDS[stream.tap_stream_id],
                schema=schema,
                metadata=metadata.to_list(mdata)))
    return catalog
Exemple #16
0
    def get_metadata(self):
        keys = self.schema.get('properties').keys()

        self.key_properties = [k for k in keys if 'date' in k]

        mdata = metadata.new()

        mdata = metadata.write(mdata, (), 'table-key-properties',
                               self.key_properties)

        mdata = metadata.write(mdata, (), 'forced-replication-method',
                               'INCREMENTAL')

        for field_name in keys:
            if field_name in self.key_properties:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected-by-default', True)

        return metadata.to_list(mdata)
Exemple #17
0
def discover_table_schema(client, table_name):
    try:
        table_info = client.describe_table(TableName=table_name).get(
            'Table', {})
    except ClientError:
        LOGGER.critical(
            "Authorization to AWS failed. Please ensure the role and policy are configured correctly on your AWS account."
        )
        return None
    else:
        # write stream metadata
        mdata = {}
        key_props = [
            key_schema.get('AttributeName')
            for key_schema in table_info.get('KeySchema', [])
        ]
        mdata = metadata.write(mdata, (), 'table-key-properties', key_props)
        if table_info.get('ItemCount'):
            mdata = metadata.write(mdata, (), 'row-count',
                                   table_info['ItemCount'])

        return {
            'table_name': table_name,
            'stream': table_name,
            'tap_stream_id': table_name,
            'metadata': metadata.to_list(mdata),
            'schema': {
                'type': 'object'
            }
        }
Exemple #18
0
    def load_metadata(self):
        schema = self.load_schema()
        mdata = metadata.new()

        mdata = metadata.write(mdata, (), 'table-key-properties',
                               self.key_properties)
        mdata = metadata.write(mdata, (), 'forced-replication-method',
                               self.replication_method)

        if self.replication_key:
            mdata = metadata.write(mdata, (), 'valid-replication-keys',
                                   [self.replication_key])

        for field_name in schema['properties'].keys():
            if field_name in self.key_properties or field_name == self.replication_key:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

        # For period stream adjust schema for time period
        if hasattr(self, 'period') and self.period == 'hourRange':
            mdata.pop(('properties', 'day'))
        elif hasattr(self, 'period') and self.period == 'dayRange':
            mdata.pop(('properties', 'hour'))

        return metadata.to_list(mdata)
Exemple #19
0
def get_schemas():

    schemas = {}
    schemas_metadata = {}

    for stream_name, stream_object in STREAMS.items():

        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)

        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method)

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys',
                                  stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(
                meta, ('properties', stream_object.replication_key),
                'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[stream_name] = schema
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Exemple #20
0
def produce_collection_schema(collection: Collection) -> Dict:
    """
    Generate a schema/catalog from the collection details for discovery mode
    Args:
        collection: stream Collection

    Returns: collection catalog

    """
    collection_name = collection.name
    collection_db_name = collection.database.name

    is_view = collection.options().get('viewOn') is not None

    mdata = {}
    mdata = metadata.write(mdata, (), 'table-key-properties', ['_id'])
    mdata = metadata.write(mdata, (), 'database-name', collection_db_name)
    mdata = metadata.write(mdata, (), 'row-count',
                           collection.estimated_document_count())
    mdata = metadata.write(mdata, (), 'is-view', is_view)

    # write valid-replication-key metadata by finding fields that have indexes on them.
    # cannot get indexes for views -- NB: This means no key-based incremental for views?
    if not is_view:
        valid_replication_keys = []
        coll_indexes = collection.index_information()
        # index_information() returns a map of index_name -> index_information
        for _, index_info in coll_indexes.items():
            # we don't support compound indexes
            if len(index_info.get('key')) == 1:
                index_field_info = index_info.get('key')[0]
                # index_field_info is a tuple of (field_name, sort_direction)
                if index_field_info:
                    valid_replication_keys.append(index_field_info[0])

        if valid_replication_keys:
            mdata = metadata.write(mdata, (), 'valid-replication-keys',
                                   valid_replication_keys)

    return {
        'table_name': collection_name,
        'stream': collection_name,
        'metadata': metadata.to_list(mdata),
        'tap_stream_id': "{}-{}".format(collection_db_name, collection_name),
        'schema': {
            'type': 'object',
            'properties': {
                "_id": {
                    "type": ["string", "null"]
                },
                "document": {
                    "type": ["object", "array", "string", "null"]
                },
                "_sdc_deleted_at": {
                    "type": ["string", "null"]
                },
            },
        }
    }
Exemple #21
0
 def test_build_field_list_include_datecreated(self):
     singer_metadata.write(self.metadata, ('properties', 'datecreated'),
                           'selected', True)
     field_list, ids_to_breadcrumbs = tap_quickbase.build_field_lists(
         self.schema, self.metadata, [])
     self.assertEqual(2, len(field_list))
     self.assertEqual(['properties', 'datecreated'],
                      ids_to_breadcrumbs['1'])
Exemple #22
0
def write_sql_data_type_md(mdata, col_info):
    c_name = col_info.column_name
    if col_info.sql_data_type == 'bit' and col_info.character_maximum_length > 1:
        mdata = metadata.write(mdata, ('properties', c_name), 'sql-datatype', "bit({})".format(col_info.character_maximum_length))
    else:
        mdata = metadata.write(mdata, ('properties', c_name), 'sql-datatype', col_info.sql_data_type)

    return mdata
Exemple #23
0
def generate_metadata(schema):
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', ['id'])
    for field_name, props in schema['properties'].items():
        mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')

    return metadata.to_list(mdata)
def field_to_property_schema(field, mdata):
    property_schema = {}

    field_name = field['name']
    sf_type = field['type']

    if sf_type in STRING_TYPES:
        property_schema['type'] = "string"
    elif sf_type in DATE_TYPES:
        date_type = {"type": "string", "format": "date-time"}
        string_type = {"type": ["string", "null"]}
        property_schema["anyOf"] = [date_type, string_type]
    elif sf_type == "boolean":
        property_schema['type'] = "boolean"
    elif sf_type in NUMBER_TYPES:
        property_schema['type'] = "number"
    elif sf_type == "address":
        property_schema['type'] = "object"
        property_schema['properties'] = {
            "street": {"type": ["null", "string"]},
            "state": {"type": ["null", "string"]},
            "postalCode": {"type": ["null", "string"]},
            "city": {"type": ["null", "string"]},
            "country": {"type": ["null", "string"]},
            "longitude": {"type": ["null", "number"]},
            "latitude": {"type": ["null", "number"]},
            "geocodeAccuracy": {"type": ["null", "string"]}
        }
    elif sf_type == "int":
        property_schema['type'] = "integer"
    elif sf_type == "time":
        property_schema['type'] = "string"
    elif sf_type in LOOSE_TYPES:
        return property_schema, mdata  # No type = all types
    elif sf_type in BINARY_TYPES:
        mdata = metadata.write(mdata, ('properties', field_name), "inclusion", "unsupported")
        mdata = metadata.write(mdata, ('properties', field_name),
                               "unsupported-description", "binary data")
        return property_schema, mdata
    elif sf_type == 'location':
        # geo coordinates are numbers or objects divided into two fields for lat/long
        property_schema['type'] = ["number", "object", "null"]
        property_schema['properties'] = {
            "longitude": {"type": ["null", "number"]},
            "latitude": {"type": ["null", "number"]}
        }
    elif sf_type == 'json':
        property_schema['type'] = "string"
    else:
        raise TapSalesforceException("Found unsupported type: {}".format(sf_type))

    # The nillable field cannot be trusted
    if field_name != 'Id' and sf_type != 'location' and sf_type not in DATE_TYPES:
        property_schema['type'] = ["null", property_schema['type']]

    return property_schema, mdata
Exemple #25
0
def generate_base_metadata(all_cubes, schema):
    mdata = metadata.get_standard_metadata(schema=schema, key_properties=["_sdc_record_hash"])
    mdata = metadata.to_map(mdata)
    mdata = metadata.write(mdata, (), "tap_google_analytics.all_cubes", list(all_cubes))
    mdata = reduce(lambda mdata, field_name: metadata.write(mdata, ("properties", field_name), "inclusion", "automatic"),
                   ["_sdc_record_hash", "start_date", "end_date", "account_id", "web_property_id", "profile_id"],
                   mdata)
    mdata = reduce(lambda mdata, field_name: metadata.write(mdata, ("properties", field_name), "tap_google_analytics.group", "Report Fields"),
                   ["_sdc_record_hash", "start_date", "end_date", "account_id", "web_property_id", "profile_id"],
                   mdata)
    return mdata
Exemple #26
0
def load_metadata(schema):
    mdata = metadata.new()

    for field_name in schema.get('properties', {}).keys():
        mdata = metadata.write(mdata, ('properties', field_name), 'inclusion',
                               'automatic')
        if field_name == "RECORDNO":
            mdata = metadata.write(mdata, (), 'table-key-properties',
                                   "RECORDNO")

    return metadata.to_list(mdata)
def create_column_metadata(cols):
    mdata = {}
    mdata = metadata.write(mdata, (), 'selected-by-default', False)
    for col in cols:
        schema = schema_for_column(col)
        mdata = metadata.write(mdata, ('properties', col.column_name),
                               'selected-by-default',
                               schema.inclusion != 'unsupported')
        mdata = metadata.write(mdata, ('properties', col.column_name),
                               'sql-datatype', col.column_type.lower())

    return metadata.to_list(mdata)
Exemple #28
0
def generate_metadata(schema_name, schema): 
    pk_fields = SCHEMA_PRIMARY_KEYS[schema_name]
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties', pk_fields)

    for field_name in schema['properties'].keys():
        if field_name in pk_fields:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata)
def load_metadata(table_spec, schema):
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', table_spec['key_properties'])

    for field_name in schema.get('properties', {}).keys():
        if table_spec.get('key_properties', []) and field_name in table_spec.get('key_properties', []):
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata)
Exemple #30
0
def default_streams(config):
    schema = config["schema"]
    primary_keys = config.get("primary_keys", [])

    mdata = {}
    metadata.write(mdata, (), "table-key-properties", primary_keys)

    return [{
        "tap_stream_id": config["topic"],
        "metadata": metadata.to_list(mdata),
        "schema": schema
    }]