def do_discover_report(sf): """Describes a Salesforce instance's reports and generates a JSON schema for each field.""" sf_custom_setting_objects = [] object_to_tag_references = {} # For each SF Report describe it, loop its fields and build a schema entries = [] report_description = sf.describe() report_name = report_description['attributes']['reportName'] fields = report_description['reportExtendedMetadata']['detailColumnInfo'] unsupported_fields = set() properties = {} mdata = metadata.new() # Loop over the report's fields for field_name, field in fields.items(): property_schema, mdata = create_report_property_schema( field, mdata, sf.source_type) # Compound Address fields and geolocations cannot be queried by the Bulk API, so we ignore them if field['dataType'] in ( "address", "location" ) and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE: mdata.pop(('properties', field_name), None) continue # we haven't been able to observe any records with a json field, so we # are marking it as unavailable until we have an example to work with if field['dataType'] == "json": unsupported_fields.add(( field_name, 'do not currently support json fields - please contact support' )) inclusion = metadata.get(mdata, ('properties', field_name), 'inclusion') if sf.select_fields_by_default and inclusion != 'unsupported': mdata = metadata.write(mdata, ('properties', field_name), 'selected-by-default', True) properties[field_name] = property_schema # There are cases where compound fields are referenced by the associated # subfields but are not actually present in the field list field_name_set = {f for f in fields} filtered_unsupported_fields = [ f for f in unsupported_fields if f[0] in field_name_set ] missing_unsupported_field_names = [ f[0] for f in unsupported_fields if f[0] not in field_name_set ] if missing_unsupported_field_names: LOGGER.info( "Ignoring the following unsupported fields for report %s as they are missing from the field list: %s", sf.report_id, ', '.join(sorted(missing_unsupported_field_names))) if filtered_unsupported_fields: LOGGER.info( "Not syncing the following unsupported fields for report %s: %s", sf.report_id, ', '.join(sorted([k for k, _ in filtered_unsupported_fields]))) # Any property added to unsupported_fields has metadata generated and # removed for prop, description in filtered_unsupported_fields: if metadata.get(mdata, ('properties', prop), 'selected-by-default'): metadata.delete(mdata, ('properties', prop), 'selected-by-default') mdata = metadata.write(mdata, ('properties', prop), 'unsupported-description', description) mdata = metadata.write(mdata, ('properties', prop), 'inclusion', 'unsupported') # this is the last entry with empty breadcumb which is required othwerise stream won't be picked up # table-key-properties is also required mdata = metadata.write(mdata, (), 'table-key-properties', []) schema = { 'type': 'object', 'additionalProperties': False, 'properties': properties } entry = { 'stream': report_name, 'tap_stream_id': sf.report_id, 'schema': schema, 'metadata': metadata.to_list(mdata), 'column_order': [str(column) for column in properties] } entries.append(entry) # For each custom setting field, remove its associated tag from entries # See Blacklisting.md for more information unsupported_tag_objects = [ object_to_tag_references[f] for f in sf_custom_setting_objects if f in object_to_tag_references ] if unsupported_tag_objects: LOGGER.info( # pylint:disable=logging-not-lazy "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + "are not supported by the Bulk API:") LOGGER.info(unsupported_tag_objects) entries = [ e for e in entries if e['stream'] not in unsupported_tag_objects ] result = {'streams': entries} json.dump(result, sys.stdout, indent=4)
def do_discover(sf): """Describes a Salesforce instance's objects and generates a JSON schema for each field.""" global_description = sf.describe() objects_to_discover = {o['name'] for o in global_description['sobjects']} key_properties = ['Id'] sf_custom_setting_objects = [] object_to_tag_references = {} # For each SF Object describe it, loop its fields and build a schema entries = [] for sobject_name in objects_to_discover: # Skip blacklisted SF objects depending on the api_type in use # ChangeEvent objects are not queryable via Bulk or REST (undocumented) if sobject_name in sf.get_blacklisted_objects() \ or sobject_name.endswith("ChangeEvent"): continue sobject_description = sf.describe(sobject_name) # Cache customSetting and Tag objects to check for blacklisting after # all objects have been described if sobject_description.get("customSetting"): sf_custom_setting_objects.append(sobject_name) elif sobject_name.endswith("__Tag"): relationship_field = next( (f for f in sobject_description["fields"] if f.get("relationshipName") == "Item"), None) if relationship_field: # Map {"Object":"Object__Tag"} object_to_tag_references[relationship_field["referenceTo"] [0]] = sobject_name fields = sobject_description['fields'] replication_key = get_replication_key(sobject_name, fields) unsupported_fields = set() properties = {} mdata = metadata.new() found_id_field = False # Loop over the object's fields for f in fields: field_name = f['name'] field_type = f['type'] if field_name == "Id": found_id_field = True property_schema, mdata = create_property_schema( f, mdata) # Compound Address fields cannot be queried by the Bulk API if f['type'] == "address" and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE: unsupported_fields.add( (field_name, 'cannot query compound address fields with bulk API')) # we haven't been able to observe any records with a json field, so we # are marking it as unavailable until we have an example to work with if f['type'] == "json": unsupported_fields.add( (field_name, 'do not currently support json fields - please contact support')) # Blacklisted fields are dependent on the api_type being used field_pair = (sobject_name, field_name) if field_pair in sf.get_blacklisted_fields(): unsupported_fields.add( (field_name, sf.get_blacklisted_fields()[field_pair])) inclusion = metadata.get( mdata, ('properties', field_name), 'inclusion') if sf.select_fields_by_default and inclusion != 'unsupported': mdata = metadata.write( mdata, ('properties', field_name), 'selected-by-default', True) properties[field_name] = property_schema if replication_key: mdata = metadata.write( mdata, ('properties', replication_key), 'inclusion', 'automatic') # There are cases where compound fields are referenced by the associated # subfields but are not actually present in the field list field_name_set = {f['name'] for f in fields} filtered_unsupported_fields = [f for f in unsupported_fields if f[0] in field_name_set] missing_unsupported_field_names = [f[0] for f in unsupported_fields if f[0] not in field_name_set] if missing_unsupported_field_names: LOGGER.info("Ignoring the following unsupported fields for object %s as they are missing from the field list: %s", sobject_name, ', '.join(sorted(missing_unsupported_field_names))) if filtered_unsupported_fields: LOGGER.info("Not syncing the following unsupported fields for object %s: %s", sobject_name, ', '.join(sorted([k for k, _ in filtered_unsupported_fields]))) # Salesforce Objects are skipped when they do not have an Id field if not found_id_field: LOGGER.info( "Skipping Salesforce Object %s, as it has no Id field", sobject_name) continue # Any property added to unsupported_fields has metadata generated and # removed for prop, description in filtered_unsupported_fields: if metadata.get(mdata, ('properties', prop), 'selected-by-default'): metadata.delete( mdata, ('properties', prop), 'selected-by-default') mdata = metadata.write( mdata, ('properties', prop), 'unsupported-description', description) mdata = metadata.write( mdata, ('properties', prop), 'inclusion', 'unsupported') if replication_key: mdata = metadata.write( mdata, (), 'replication-key', replication_key) else: mdata = metadata.write( mdata, (), 'forced-replication-method', { 'replication-method': 'FULL_TABLE', 'reason': 'No replication keys found from the Salesforce API'}) mdata = metadata.write(mdata, (), 'table-key-properties', key_properties) schema = { 'type': 'object', 'additionalProperties': False, 'properties': properties } entry = { 'stream': sobject_name, 'tap_stream_id': sobject_name, 'schema': schema, 'metadata': metadata.to_list(mdata) } entries.append(entry) # For each custom setting field, remove its associated tag from entries # See Blacklisting.md for more information unsupported_tag_objects = [object_to_tag_references[f] for f in sf_custom_setting_objects if f in object_to_tag_references] if unsupported_tag_objects: LOGGER.info( #pylint:disable=logging-not-lazy "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + "are not supported by the Bulk API:") LOGGER.info(unsupported_tag_objects) entries = [e for e in entries if e['stream'] not in unsupported_tag_objects] result = {'streams': entries} json.dump(result, sys.stdout, indent=4)
def do_discover(sf): """Describes a Salesforce instance's objects and generates a JSON schema for each field.""" global_description = sf.describe() objects_set = {o["name"] for o in global_description["sobjects"]} objects_to_discover = [ "Account", "Contact", "Lead", "Opportunity", "Campaign", "AccountContactRelation", "AccountContactRole", "OpportunityContactRole", "CampaignMember", "Task", "Invoice__c", "OpportunityHistory", "AccountHistory", "LeadHistory", "User" ] key_properties = ["Id"] sf_custom_setting_objects = [] object_to_tag_references = {} # For each SF Object describe it, loop its fields and build a schema entries = [] # Check if the user has BULK API enabled if sf.api_type == "BULK" and not Bulk(sf).has_permissions(): raise TapSalesforceBulkAPIDisabledException( 'This client does not have Bulk API permissions, received "API_DISABLED_FOR_ORG" error code' ) for sobject_name in objects_to_discover: # Skip blacklisted SF objects depending on the api_type in use # ChangeEvent objects are not queryable via Bulk or REST (undocumented) if sobject_name in sf.get_blacklisted_objects( ) or sobject_name.endswith("ChangeEvent"): continue if sobject_name not in objects_set: continue sobject_description = sf.describe(sobject_name) # Cache customSetting and Tag objects to check for blacklisting after # all objects have been described if sobject_description.get("customSetting"): sf_custom_setting_objects.append(sobject_name) elif sobject_name.endswith("__Tag"): relationship_field = next( (f for f in sobject_description["fields"] if f.get("relationshipName") == "Item"), None, ) if relationship_field: # Map {"Object":"Object__Tag"} object_to_tag_references[relationship_field["referenceTo"] [0]] = sobject_name fields = sobject_description["fields"] replication_key = get_replication_key(sobject_name, fields) unsupported_fields = set() properties = {} mdata = metadata.new() found_id_field = False # Loop over the object's fields for f in fields: field_name = f["name"] if field_name == "Id": found_id_field = True property_schema, mdata = create_property_schema(f, mdata) # Compound Address fields cannot be queried by the Bulk API if (f["type"] == "address" and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE): unsupported_fields.add( (field_name, "cannot query compound address fields with bulk API")) # we haven't been able to observe any records with a json field, so we # are marking it as unavailable until we have an example to work with if f["type"] == "json": unsupported_fields.add(( field_name, "do not currently support json fields - please contact support", )) # Blacklisted fields are dependent on the api_type being used field_pair = (sobject_name, field_name) if field_pair in sf.get_blacklisted_fields(): unsupported_fields.add( (field_name, sf.get_blacklisted_fields()[field_pair])) inclusion = metadata.get(mdata, ("properties", field_name), "inclusion") if sf.select_fields_by_default and inclusion != "unsupported": mdata = metadata.write(mdata, ("properties", field_name), "selected-by-default", True) properties[field_name] = property_schema if replication_key: mdata = metadata.write(mdata, ("properties", replication_key), "inclusion", "automatic") # There are cases where compound fields are referenced by the associated # subfields but are not actually present in the field list field_name_set = {f["name"] for f in fields} filtered_unsupported_fields = [ f for f in unsupported_fields if f[0] in field_name_set ] missing_unsupported_field_names = [ f[0] for f in unsupported_fields if f[0] not in field_name_set ] if missing_unsupported_field_names: LOGGER.info( "Ignoring the following unsupported fields for object %s as they are missing from the field list: %s", sobject_name, ", ".join(sorted(missing_unsupported_field_names)), ) if filtered_unsupported_fields: LOGGER.info( "Not syncing the following unsupported fields for object %s: %s", sobject_name, ", ".join(sorted([k for k, _ in filtered_unsupported_fields])), ) # Salesforce Objects are skipped when they do not have an Id field if not found_id_field: LOGGER.info("Skipping Salesforce Object %s, as it has no Id field", sobject_name) continue # Any property added to unsupported_fields has metadata generated and # removed for prop, description in filtered_unsupported_fields: if metadata.get(mdata, ("properties", prop), "selected-by-default"): metadata.delete(mdata, ("properties", prop), "selected-by-default") mdata = metadata.write(mdata, ("properties", prop), "unsupported-description", description) mdata = metadata.write(mdata, ("properties", prop), "inclusion", "unsupported") if replication_key: mdata = metadata.write(mdata, (), "valid-replication-keys", [replication_key]) else: mdata = metadata.write( mdata, (), "forced-replication-method", { "replication-method": "FULL_TABLE", "reason": "No replication keys found from the Salesforce API", }, ) mdata = metadata.write(mdata, (), "table-key-properties", key_properties) mdata = metadata.write(mdata, (), "selected", True) schema = { "type": "object", "additionalProperties": False, "properties": properties, } entry = { "stream": sobject_name, "tap_stream_id": sobject_name, "schema": schema, "metadata": metadata.to_list(mdata), } entries.append(entry) # For each custom setting field, remove its associated tag from entries # See Blacklisting.md for more information unsupported_tag_objects = [ object_to_tag_references[f] for f in sf_custom_setting_objects if f in object_to_tag_references ] if unsupported_tag_objects: LOGGER.info( # pylint:disable=logging-not-lazy "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + "are not supported by the Bulk API:") LOGGER.info(unsupported_tag_objects) entries = [ e for e in entries if e["stream"] not in unsupported_tag_objects ] result = {"streams": entries} return result