def do_discover_report(sf):
    """Describes a Salesforce instance's reports and generates a JSON schema for each field."""
    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Report describe it, loop its fields and build a schema
    entries = []

    report_description = sf.describe()

    report_name = report_description['attributes']['reportName']
    fields = report_description['reportExtendedMetadata']['detailColumnInfo']

    unsupported_fields = set()
    properties = {}
    mdata =

    # Loop over the report's fields
    for field_name, field in fields.items():
        property_schema, mdata = create_report_property_schema(
            field, mdata, sf.source_type)

        # Compound Address fields and geolocations cannot be queried by the Bulk API, so we ignore them
        if field['dataType'] in (
                "address", "location"
        ) and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE:
            mdata.pop(('properties', field_name), None)

        # we haven't been able to observe any records with a json field, so we
        # are marking it as unavailable until we have an example to work with
        if field['dataType'] == "json":
                'do not currently support json fields - please contact support'

        inclusion = metadata.get(mdata, ('properties', field_name),

        if sf.select_fields_by_default and inclusion != 'unsupported':
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected-by-default', True)

        properties[field_name] = property_schema

    # There are cases where compound fields are referenced by the associated
    # subfields but are not actually present in the field list
    field_name_set = {f for f in fields}
    filtered_unsupported_fields = [
        f for f in unsupported_fields if f[0] in field_name_set
    missing_unsupported_field_names = [
        f[0] for f in unsupported_fields if f[0] not in field_name_set

    if missing_unsupported_field_names:
            "Ignoring the following unsupported fields for report %s as they are missing from the field list: %s",
            sf.report_id, ', '.join(sorted(missing_unsupported_field_names)))

    if filtered_unsupported_fields:
            "Not syncing the following unsupported fields for report %s: %s",
            ', '.join(sorted([k for k, _ in filtered_unsupported_fields])))

    # Any property added to unsupported_fields has metadata generated and
    # removed
    for prop, description in filtered_unsupported_fields:
        if metadata.get(mdata, ('properties', prop), 'selected-by-default'):
            metadata.delete(mdata, ('properties', prop), 'selected-by-default')

        mdata = metadata.write(mdata, ('properties', prop),
                               'unsupported-description', description)
        mdata = metadata.write(mdata, ('properties', prop), 'inclusion',

    # this is the last entry with empty breadcumb which is required othwerise stream won't be picked up
    # table-key-properties is also required
    mdata = metadata.write(mdata, (), 'table-key-properties', [])

    schema = {
        'type': 'object',
        'additionalProperties': False,
        'properties': properties

    entry = {
        'stream': report_name,
        'tap_stream_id': sf.report_id,
        'schema': schema,
        'metadata': metadata.to_list(mdata),
        'column_order': [str(column) for column in properties]


    # For each custom setting field, remove its associated tag from entries
    # See for more information
    unsupported_tag_objects = [
        object_to_tag_references[f] for f in sf_custom_setting_objects
        if f in object_to_tag_references
    if unsupported_tag_objects:  # pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects "
            + "are not supported by the Bulk API:")
        entries = [
            e for e in entries if e['stream'] not in unsupported_tag_objects

    result = {'streams': entries}
    json.dump(result, sys.stdout, indent=4)
def do_discover(sf):
    """Describes a Salesforce instance's objects and generates a JSON schema for each field."""
    global_description = sf.describe()

    objects_to_discover = {o['name'] for o in global_description['sobjects']}
    key_properties = ['Id']

    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Object describe it, loop its fields and build a schema
    entries = []
    for sobject_name in objects_to_discover:

        # Skip blacklisted SF objects depending on the api_type in use
        # ChangeEvent objects are not queryable via Bulk or REST (undocumented)
        if sobject_name in sf.get_blacklisted_objects() \
           or sobject_name.endswith("ChangeEvent"):

        sobject_description = sf.describe(sobject_name)

        # Cache customSetting and Tag objects to check for blacklisting after
        # all objects have been described
        if sobject_description.get("customSetting"):
        elif sobject_name.endswith("__Tag"):
            relationship_field = next(
                (f for f in sobject_description["fields"] if f.get("relationshipName") == "Item"),
            if relationship_field:
                # Map {"Object":"Object__Tag"}
                                         [0]] = sobject_name

        fields = sobject_description['fields']
        replication_key = get_replication_key(sobject_name, fields)

        unsupported_fields = set()
        properties = {}
        mdata =

        found_id_field = False

        # Loop over the object's fields
        for f in fields:
            field_name = f['name']
            field_type = f['type']

            if field_name == "Id":
                found_id_field = True

            property_schema, mdata = create_property_schema(
                f, mdata)

            # Compound Address fields cannot be queried by the Bulk API
            if f['type'] == "address" and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE:
                    (field_name, 'cannot query compound address fields with bulk API'))

            # we haven't been able to observe any records with a json field, so we
            # are marking it as unavailable until we have an example to work with
            if f['type'] == "json":
                    (field_name, 'do not currently support json fields - please contact support'))

            # Blacklisted fields are dependent on the api_type being used
            field_pair = (sobject_name, field_name)
            if field_pair in sf.get_blacklisted_fields():
                    (field_name, sf.get_blacklisted_fields()[field_pair]))

            inclusion = metadata.get(
                mdata, ('properties', field_name), 'inclusion')

            if sf.select_fields_by_default and inclusion != 'unsupported':
                mdata = metadata.write(
                    mdata, ('properties', field_name), 'selected-by-default', True)

            properties[field_name] = property_schema

        if replication_key:
            mdata = metadata.write(
                mdata, ('properties', replication_key), 'inclusion', 'automatic')

        # There are cases where compound fields are referenced by the associated
        # subfields but are not actually present in the field list
        field_name_set = {f['name'] for f in fields}
        filtered_unsupported_fields = [f for f in unsupported_fields if f[0] in field_name_set]
        missing_unsupported_field_names = [f[0] for f in unsupported_fields if f[0] not in field_name_set]

        if missing_unsupported_field_names:
  "Ignoring the following unsupported fields for object %s as they are missing from the field list: %s",
                        ', '.join(sorted(missing_unsupported_field_names)))

        if filtered_unsupported_fields:
  "Not syncing the following unsupported fields for object %s: %s",
                        ', '.join(sorted([k for k, _ in filtered_unsupported_fields])))

        # Salesforce Objects are skipped when they do not have an Id field
        if not found_id_field:
                "Skipping Salesforce Object %s, as it has no Id field",

        # Any property added to unsupported_fields has metadata generated and
        # removed
        for prop, description in filtered_unsupported_fields:
            if metadata.get(mdata, ('properties', prop),
                    mdata, ('properties', prop), 'selected-by-default')

            mdata = metadata.write(
                mdata, ('properties', prop), 'unsupported-description', description)
            mdata = metadata.write(
                mdata, ('properties', prop), 'inclusion', 'unsupported')

        if replication_key:
            mdata = metadata.write(
                mdata, (), 'replication-key', replication_key)
            mdata = metadata.write(
                    'replication-method': 'FULL_TABLE',
                    'reason': 'No replication keys found from the Salesforce API'})

        mdata = metadata.write(mdata, (), 'table-key-properties', key_properties)

        schema = {
            'type': 'object',
            'additionalProperties': False,
            'properties': properties

        entry = {
            'stream': sobject_name,
            'tap_stream_id': sobject_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata)


    # For each custom setting field, remove its associated tag from entries
    # See for more information
    unsupported_tag_objects = [object_to_tag_references[f]
                               for f in sf_custom_setting_objects if f in object_to_tag_references]
    if unsupported_tag_objects: #pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " +
            "are not supported by the Bulk API:")
        entries = [e for e in entries if e['stream']
                   not in unsupported_tag_objects]

    result = {'streams': entries}
    json.dump(result, sys.stdout, indent=4)
Exemple #3
def do_discover(sf):
    """Describes a Salesforce instance's objects and generates a JSON schema for each field."""
    global_description = sf.describe()

    objects_set = {o["name"] for o in global_description["sobjects"]}
    objects_to_discover = [
        "Account", "Contact", "Lead", "Opportunity", "Campaign",
        "AccountContactRelation", "AccountContactRole",
        "OpportunityContactRole", "CampaignMember", "Task", "Invoice__c",
        "OpportunityHistory", "AccountHistory", "LeadHistory", "User"
    key_properties = ["Id"]

    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Object describe it, loop its fields and build a schema
    entries = []

    # Check if the user has BULK API enabled
    if sf.api_type == "BULK" and not Bulk(sf).has_permissions():
        raise TapSalesforceBulkAPIDisabledException(
            'This client does not have Bulk API permissions, received "API_DISABLED_FOR_ORG" error code'

    for sobject_name in objects_to_discover:

        # Skip blacklisted SF objects depending on the api_type in use
        # ChangeEvent objects are not queryable via Bulk or REST (undocumented)
        if sobject_name in sf.get_blacklisted_objects(
        ) or sobject_name.endswith("ChangeEvent"):
        if sobject_name not in objects_set:

        sobject_description = sf.describe(sobject_name)

        # Cache customSetting and Tag objects to check for blacklisting after
        # all objects have been described
        if sobject_description.get("customSetting"):
        elif sobject_name.endswith("__Tag"):
            relationship_field = next(
                (f for f in sobject_description["fields"]
                 if f.get("relationshipName") == "Item"),
            if relationship_field:
                # Map {"Object":"Object__Tag"}
                                         [0]] = sobject_name

        fields = sobject_description["fields"]
        replication_key = get_replication_key(sobject_name, fields)

        unsupported_fields = set()
        properties = {}
        mdata =

        found_id_field = False

        # Loop over the object's fields
        for f in fields:
            field_name = f["name"]

            if field_name == "Id":
                found_id_field = True

            property_schema, mdata = create_property_schema(f, mdata)

            # Compound Address fields cannot be queried by the Bulk API
            if (f["type"] == "address" and sf.api_type
                    == tap_salesforce.salesforce.BULK_API_TYPE):
                     "cannot query compound address fields with bulk API"))

            # we haven't been able to observe any records with a json field, so we
            # are marking it as unavailable until we have an example to work with
            if f["type"] == "json":
                    "do not currently support json fields - please contact support",

            # Blacklisted fields are dependent on the api_type being used
            field_pair = (sobject_name, field_name)
            if field_pair in sf.get_blacklisted_fields():
                    (field_name, sf.get_blacklisted_fields()[field_pair]))

            inclusion = metadata.get(mdata, ("properties", field_name),

            if sf.select_fields_by_default and inclusion != "unsupported":
                mdata = metadata.write(mdata, ("properties", field_name),
                                       "selected-by-default", True)

            properties[field_name] = property_schema

        if replication_key:
            mdata = metadata.write(mdata, ("properties", replication_key),
                                   "inclusion", "automatic")

        # There are cases where compound fields are referenced by the associated
        # subfields but are not actually present in the field list
        field_name_set = {f["name"] for f in fields}
        filtered_unsupported_fields = [
            f for f in unsupported_fields if f[0] in field_name_set
        missing_unsupported_field_names = [
            f[0] for f in unsupported_fields if f[0] not in field_name_set

        if missing_unsupported_field_names:
                "Ignoring the following unsupported fields for object %s as they are missing from the field list: %s",
                ", ".join(sorted(missing_unsupported_field_names)),

        if filtered_unsupported_fields:
                "Not syncing the following unsupported fields for object %s: %s",
                ", ".join(sorted([k for k, _ in filtered_unsupported_fields])),

        # Salesforce Objects are skipped when they do not have an Id field
        if not found_id_field:
  "Skipping Salesforce Object %s, as it has no Id field",

        # Any property added to unsupported_fields has metadata generated and
        # removed
        for prop, description in filtered_unsupported_fields:
            if metadata.get(mdata, ("properties", prop),
                metadata.delete(mdata, ("properties", prop),

            mdata = metadata.write(mdata, ("properties", prop),
                                   "unsupported-description", description)
            mdata = metadata.write(mdata, ("properties", prop), "inclusion",

        if replication_key:
            mdata = metadata.write(mdata, (), "valid-replication-keys",
            mdata = metadata.write(
                    "replication-method": "FULL_TABLE",
                    "No replication keys found from the Salesforce API",

        mdata = metadata.write(mdata, (), "table-key-properties",
        mdata = metadata.write(mdata, (), "selected", True)

        schema = {
            "type": "object",
            "additionalProperties": False,
            "properties": properties,

        entry = {
            "stream": sobject_name,
            "tap_stream_id": sobject_name,
            "schema": schema,
            "metadata": metadata.to_list(mdata),


    # For each custom setting field, remove its associated tag from entries
    # See for more information
    unsupported_tag_objects = [
        object_to_tag_references[f] for f in sf_custom_setting_objects
        if f in object_to_tag_references
    if unsupported_tag_objects:  # pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects "
            + "are not supported by the Bulk API:")
        entries = [
            e for e in entries if e["stream"] not in unsupported_tag_objects

    result = {"streams": entries}
    return result