def ingest_schema(communityName,
                  domainName,
                  schemaList,
                  session,
                  parentCommunityName=None):

    community_list.append(communityName)
    domain_list.append((communityName, domainName, 'Physical Data Dictionary'))

    # adds all the schema to the asset list
    for schemaName in schemaList:
        asset_set.add((communityName, domainName, schemaName, 'Schema'))

    #convert asset set to a list
    asset_list = list(asset_set)

    for community in community_list:
        json_obj.append(parser.getCommunityObj(community, parentCommunityName))

    for communityName, domainName, domainType in domain_list:
        json_obj.append(
            parser.getDomainObj(communityName, domainName, domainType))

    for communityName, domainName, assetName, assetType in asset_list:
        json_obj.append(
            parser.getAssetObj(communityName, domainName, assetName, assetType,
                               assetName))

    with open("schema_template.json", "w") as write_file:
        json.dump(json_obj, write_file)

    url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job'
    # url = 'https://asu.collibra.com/rest/2.0/import/json-job'

    files = {'file': open('schema_template.json', 'rb')}
    payload = {'sendNotification': 'true'}

    response = session.post(url, files=files, data=payload)

    # print(response.request.headers)

    if response:
        print(response.json())
    else:
        print(response.text)
def ingest_table(communityName,
                 domainName,
                 schemaList,
                 tableList,
                 attrList,
                 session,
                 parentCommunityName=None):

    community_list.append(communityName)
    domain_list.append((communityName, domainName, 'Physical Data Dictionary'))

    if schemaList != None:
        print("Schema List is not null")
        # adds all the table under the given schema to the asset list
        if attrList != None:
            print("Table attributes is not null")
            for schemaName, tableName, tableAttr in zip(
                    schemaList, tableList, attrList):
                # (relation_type_id:relation_direction, relation_asset_name) - (schema type id , schema name)
                relations = ('00000000-0000-0000-0000-000000007043:SOURCE',
                             schemaName)
                asset_set.add(
                    (communityName, domainName, schemaName + '__' + tableName,
                     'Table', tableName, tableAttr, relations))
        else:
            print("Table Attributes list is null")
            for schemaName, tableName in zip(schemaList, tableList):
                # (relation_type_id:relation_direction, relation_asset_name) - (schema type id , schema name)
                relations = ('00000000-0000-0000-0000-000000007043:SOURCE',
                             schemaName)
                asset_set.add(
                    (communityName, domainName, schemaName + '__' + tableName,
                     'Table', tableName, None, relations))
    else:
        print("Schema List is null")
        # adds all the table to the asset list
        if attrList != None:
            print("Table attributes is not null")
            for tableName, tableAttr in zip(tableList, attrList):
                asset_set.add((communityName, domainName, tableName, 'Table',
                               tableName, tableAttr, None))
        else:
            print("Table attributes is null")
            for tableName in tableList:
                asset_set.add((communityName, domainName, tableName, 'Table',
                               tableName, None, None))

    asset_list = list(asset_set)

    for community in community_list:
        json_obj.append(parser.getCommunityObj(community, parentCommunityName))

    for communityName, domainName, domainType in domain_list:
        json_obj.append(
            parser.getDomainObj(communityName, domainName, domainType))

    for communityName, domainName, assetName, assetType, tableName, attrList, relation in asset_list:
        json_obj.append(
            parser.getAssetObj(communityName, domainName, assetName, assetType,
                               tableName, attrList, relation))

    with open("table_template.json", "w") as write_file:
        json.dump(json_obj, write_file)

    url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job'
    # url = 'https://asu.collibra.com/rest/2.0/import/json-job'

    files = {'file': open('table_template.json', 'rb')}
    payload = {'sendNotification': 'true'}

    response = session.post(url, files=files, data=payload)

    if response:
        print(response.json())
    else:
        print(response.text)
def ingest_column(communityName,
                  domainName,
                  schemaList,
                  tableList,
                  colList,
                  attrList,
                  session,
                  parentCommunityName=None):
    global json_obj
    community_list.append(communityName)
    domain_list.append((communityName, domainName, 'Physical Data Dictionary'))

    if schemaList != None:
        for schemaName, tableName, columnName, columnAttr in zip(
                schemaList, tableList, colList, attrList):
            # (relation_type_id:relation_direction, relation_asset_name)
            relations = ('00000000-0000-0000-0000-000000007042:TARGET',
                         schemaName + '.' + tableName)
            asset_set.add((communityName, domainName,
                           schemaName + '.' + tableName + '.' + columnName,
                           'Column', columnName, columnAttr, relations))
    else:
        for tableName, columnName, columnAttr in zip(tableList, colList,
                                                     attrList):
            # (relation_type_id:relation_direction, relation_asset_name)
            relations = ('00000000-0000-0000-0000-000000007042:TARGET',
                         tableName)
            asset_set.add(
                (communityName, domainName, tableName + '.' + columnName,
                 'Column', columnName, columnAttr, relations))

    asset_list = list(asset_set)

    for community in community_list:
        json_obj.append(parser.getCommunityObj(community, parentCommunityName))

    for communityName, domainName, domainType in domain_list:
        json_obj.append(
            parser.getDomainObj(communityName, domainName, domainType))

    for communityName, domainName, assetName, assetType, columnName, attrList, relation in asset_list:
        # split if file size is roughly around 22MB
        if (sys.getsizeof(json_obj) > 400000):
            list_json_obj.append(json_obj)
            json_obj = []
        json_obj.append(
            parser.getAssetObj(communityName, domainName, assetName, assetType,
                               columnName, attrList, relation))

    list_json_obj.append(json_obj)
    for index, json_obj in enumerate(list_json_obj, 1):
        with open(f"column_template_{index}.json", "w") as write_file:
            json.dump(json_obj, write_file)

        print(
            f"Ingesting data from template file: column_template_{index}.json")
        url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job'
        # url = 'https://asu.collibra.com/rest/2.0/import/json-job'

        files = {'file': open(f'column_template_{index}.json', 'rb')}
        payload = {'sendNotification': 'true'}

        response = session.post(url, files=files, data=payload)

        # print(response.request.headers)

        if response:
            print(response.json())
        else:
            print(response.text)