Esempio n. 1
0
    def save_resource(self, populated_tiles, resourceinstanceid, legacyid,
                      resources, target_resource_model, bulk, save_count):
        # create a resource instance only if there are populated_tiles
        errors = []
        if len(populated_tiles) > 0:
            newresourceinstance = Resource(
                resourceinstanceid=resourceinstanceid,
                graph_id=target_resource_model,
                legacyid=legacyid,
                createdtime=datetime.datetime.now())
            # add the tiles to the resource instance
            newresourceinstance.tiles = populated_tiles
            # if bulk saving then append the resources to a list otherwise just save the resource
            if bulk:
                resources.append(newresourceinstance)
                if len(resources) >= settings.BULK_IMPORT_BATCH_SIZE:
                    Resource.bulk_save(resources=resources)
                    del resources[:]  #clear out the array
            else:
                newresourceinstance.save()
        else:
            errors.append({
                'type':
                'WARNING',
                'message':
                'No resource created for legacyid: {0}. Make sure there is data to be imported for this resource and it is mapped properly in your mapping file.'
                .format(legacyid)
            })
            if len(errors) > 0:
                self.errors += errors

        if save_count % (settings.BULK_IMPORT_BATCH_SIZE / 4) == 0:
            print '%s resources processed' % str(save_count)
Esempio n. 2
0
    def save_resource(self, populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count):
        # create a resource instance only if there are populated_tiles
        errors = []
        if len(populated_tiles) > 0:
            newresourceinstance = Resource(
                resourceinstanceid=resourceinstanceid,
                graph_id=target_resource_model,
                legacyid=legacyid,
                createdtime=datetime.datetime.now()
            )
            # add the tiles to the resource instance
            newresourceinstance.tiles = populated_tiles
            # if bulk saving then append the resources to a list otherwise just save the resource
            if bulk:
                resources.append(newresourceinstance)
                if len(resources) == settings.BULK_IMPORT_BATCH_SIZE:
                    Resource.bulk_save(resources=resources)
                    del resources[:]  #clear out the array
            else:
                newresourceinstance.save()
        else:
            errors.append({'type': 'WARNING', 'message': 'No resource created for legacyid: {0}. Make sure there is data to be imported for this resource and it is mapped properly in your mapping file.'.format(legacyid)})
            if len(errors) > 0:
                self.errors += errors


        if save_count % (settings.BULK_IMPORT_BATCH_SIZE/4) == 0:
            print '%s resources processed' % str(save_count)
Esempio n. 3
0
    def save_resource(self, populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count):
        # create a resource instance only if there are populated_tiles
        errors = []
        if len(populated_tiles) > 0:
            newresourceinstance = Resource(
                resourceinstanceid=resourceinstanceid,
                graph_id=target_resource_model,
                legacyid=legacyid,
                createdtime=datetime.datetime.now()
            )
            # add the tiles to the resource instance
            newresourceinstance.tiles = populated_tiles
            # if bulk saving then append the resources to a list otherwise just save the resource
            if bulk:
                resources.append(newresourceinstance)
                if len(resources) >= settings.BULK_IMPORT_BATCH_SIZE:
                    Resource.bulk_save(resources=resources)
                    del resources[:]  #clear out the array
            else:
                try:
                    newresourceinstance.save()
                except TransportError as e:
                    cause = json.dumps(e.info['error']['caused_by'],indent=1)
                    msg = '%s: WARNING: failed to index document in resource: %s. Exception detail:\n%s\n' % (datetime.datetime.now(), resourceinstanceid, cause)
                    errors.append({'type': 'WARNING', 'message': msg})
                    newresourceinstance.delete()
                    save_count=save_count-1
                except Exception as e:
                    msg = '%s: WARNING: failed to index document in resource: %s. Exception detail:\n%s\n' % (datetime.datetime.now(), resourceinstanceid, e)
                    errors.append({'type': 'WARNING', 'message': msg})
                    newresourceinstance.delete()
                    save_count=save_count-1

        else:
            errors.append({'type': 'WARNING', 'message': 'No resource created for legacyid: {0}. Make sure there is data to be imported for this resource and it is mapped properly in your mapping file.'.format(legacyid)})

        if len(errors) > 0:
            self.errors += errors

        if save_count % (settings.BULK_IMPORT_BATCH_SIZE/4) == 0:
            print '%s resources processed' % str(save_count)
Esempio n. 4
0
    def save_resource(self, populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count, row_number):
        # create a resource instance only if there are populated_tiles
        errors = []
        if len(populated_tiles) > 0:
            newresourceinstance = Resource(
                resourceinstanceid=resourceinstanceid,
                graph_id=target_resource_model,
                legacyid=legacyid,
                createdtime=datetime.datetime.now()
            )
            # add the tiles to the resource instance
            newresourceinstance.tiles = populated_tiles
            # if bulk saving then append the resources to a list otherwise just save the resource
            if bulk:
                resources.append(newresourceinstance)
                if len(resources) >= settings.BULK_IMPORT_BATCH_SIZE:
                    Resource.bulk_save(resources=resources)
                    del resources[:]  #clear out the array
            else:
                try:
                    newresourceinstance.save()
                except TransportError as e:
                    cause = json.dumps(e.info['error']['caused_by'],indent=1)
                    msg = '%s: WARNING: failed to index document in resource: %s %s. Exception detail:\n%s\n' % (datetime.datetime.now(), resourceinstanceid, row_number, cause)
                    errors.append({'type': 'WARNING', 'message': msg})
                    newresourceinstance.delete()
                    save_count=save_count-1
                except Exception as e:
                    msg = '%s: WARNING: failed to index document in resource: %s %s. Exception detail:\n%s\n' % (datetime.datetime.now(), resourceinstanceid, row_number, e)
                    errors.append({'type': 'WARNING', 'message': msg})
                    newresourceinstance.delete()
                    save_count=save_count-1

        else:
            errors.append({'type': 'WARNING', 'message': 'No resource created for legacyid: {0}. Make sure there is data to be imported for this resource and it is mapped properly in your mapping file.'.format(legacyid)})

        if len(errors) > 0:
            self.errors += errors

        if save_count % (settings.BULK_IMPORT_BATCH_SIZE/4) == 0:
            print '%s resources processed' % str(save_count)
Esempio n. 5
0
    def import_business_data(self, business_data=None, mapping=None, overwrite='append', bulk=False, create_concepts=False, create_collections=False):
        # errors = businessDataValidator(self.business_data)

        def get_display_nodes(graphid):
            display_nodeids = []
            functions = FunctionXGraph.objects.filter(function_id='60000000-0000-0000-0000-000000000001', graph_id=graphid)
            for function in functions:
                f = function.config
                del f['triggering_nodegroups']

                for k,v in f.iteritems():
                    v['node_ids'] = []
                    v['string_template'] = v['string_template'].replace('<', '').replace('>', '').split(', ')
                    if v['nodegroup_id'] != '':
                        nodes = Node.objects.filter(nodegroup_id=v['nodegroup_id'])
                        for node in nodes:
                            if node.name in v['string_template']:
                                display_nodeids.append(str(node.nodeid))

                for k,v in f.iteritems():
                    if v['string_template'] != ['']:
                        print 'The {0} {1} in the {2} display function.'.format(', '.join(v['string_template']), 'nodes participate' if len(v['string_template']) > 1 else 'node participates', k)
                    else:
                        print 'No nodes participate in the {0} display function.'.format(k)

            return display_nodeids

        def process_resourceid(resourceid, overwrite):
            # Test if resourceid is a UUID.
            try:
                resourceinstanceid = uuid.UUID(resourceid)
                # If resourceid is a UUID check if it is already an arches resource.
                try:
                    ret = Resource.objects.filter(resourceinstanceid=resourceid)
                    # If resourceid is an arches resource and overwrite is true, delete the existing arches resource.
                    if overwrite == 'overwrite':
                        Resource.objects.get(pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = resourceinstanceid
                # If resourceid is not a UUID create one.
                except:
                    resourceinstanceid = resourceinstanceid
            except:
                # Get resources with the given legacyid
                ret = Resource.objects.filter(legacyid=resourceid)
                # If more than one resource is returned than make resource = None. This should never actually happen.
                if len(ret) > 1:
                    resourceinstanceid = None
                # If no resource is returned with the given legacyid then create an archesid for the resource.
                elif len(ret) == 0:
                    resourceinstanceid = uuid.uuid4()
                # If a resource is returned with the give legacyid then return its archesid
                else:
                    if overwrite == 'overwrite':
                        Resource.objects.get(pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = ret[0].resourceinstanceid

            return resourceinstanceid

        try:
            with transaction.atomic():
                save_count = 0
                try:
                    resourceinstanceid = process_resourceid(business_data[0]['ResourceID'], overwrite)
                except KeyError:
                    print '*'*80
                    print 'ERROR: No column \'ResourceID\' found in business data file. Please add a \'ResourceID\' column with a unique resource identifier.'
                    print '*'*80
                    sys.exit()
                blanktilecache = {}
                populated_nodegroups = {}
                populated_nodegroups[resourceinstanceid] = []
                previous_row_resourceid = None
                populated_tiles = []
                target_resource_model = None
                single_cardinality_nodegroups = [str(nodegroupid) for nodegroupid in NodeGroup.objects.values_list('nodegroupid', flat=True).filter(cardinality = '1')]
                node_datatypes = {str(nodeid): datatype for nodeid, datatype in  Node.objects.values_list('nodeid', 'datatype').filter(~Q(datatype='semantic'), graph__isresource=True)}
                display_nodes = get_display_nodes(mapping['resource_model_id'])
                all_nodes = Node.objects.all()
                datatype_factory = DataTypeFactory()
                concepts_to_create = {}
                new_concepts = {}
                required_nodes = {}
                for node in Node.objects.filter(isrequired=True, graph_id=mapping['resource_model_id']).values_list('nodeid', 'name'):
                    required_nodes[str(node[0])] = node[1]

                # This code can probably be moved into it's own module.
                resourceids = []
                non_contiguous_resource_ids = []
                previous_row_for_validation = None

                for row_number, row in enumerate(business_data):
                    # Check contiguousness of csv file.
                    if row['ResourceID'] != previous_row_for_validation and row['ResourceID'] in resourceids:
                        non_contiguous_resource_ids.append(row['ResourceID'])
                    else:
                        resourceids.append(row['ResourceID'])
                    previous_row_for_validation = row['ResourceID']

                    if create_concepts == True:
                        for node in mapping['nodes']:
                            if node['data_type'] in ['concept', 'concept-list', 'domain-value', 'domain-value-list'] and node['file_field_name'] in row.keys():

                                # print row[node['file_field_name']]
                                concept = []
                                for val in csv.reader([row[node['file_field_name']]], delimiter=',', quotechar='"'):
                                    concept.append(val)
                                concept = concept[0]

                                # check if collection is in concepts_to_create, add collection to concepts_to_create if it's not and add first child concept
                                if node['arches_nodeid'] not in concepts_to_create:
                                    concepts_to_create[node['arches_nodeid']] = {}
                                    for concept_value in concept:
                                        concepts_to_create[node['arches_nodeid']][str(uuid.uuid4())] = concept_value
                                # if collection in concepts to create then add child concept to collection
                                elif row[node['file_field_name']] not in concepts_to_create[node['arches_nodeid']].values():
                                    for concept_value in concept:
                                        concepts_to_create[node['arches_nodeid']][str(uuid.uuid4())] = concept_value

                if len(non_contiguous_resource_ids) > 0:
                    print '*'*80
                    for non_contiguous_resource_id in non_contiguous_resource_ids:
                        print 'ResourceID: ' + non_contiguous_resource_id
                    print 'ERROR: The preceding ResourceIDs are non-contiguous in your csv file. Please sort your csv file by ResourceID and try import again.'
                    print '*'*80
                    sys.exit()

                def create_reference_data(new_concepts, create_collections):
                    errors = []
                    candidates = Concept().get(id='00000000-0000-0000-0000-000000000006')
                    for arches_nodeid, concepts in new_concepts.iteritems():
                        collectionid = str(uuid.uuid4())
                        topconceptid = str(uuid.uuid4())
                        node = Node.objects.get(nodeid=arches_nodeid)

                        # if node.datatype is concept or concept-list create concepts and collections
                        if node.datatype in ['concept', 'concept-list']:
                            # create collection if create_collections = create, otherwise append to collection already assigned to node
                            if create_collections == True:
                                collection_legacyoid = node.name + '_' + str(node.graph_id) + '_import'
                                # check to see that there is not already a collection for this node
                                if node.config['rdmCollection'] != None:
                                    errors.append({'type': 'WARNING', 'message': 'A collection already exists for the {0} node. Use the add option to add concepts to this collection.'.format(node.name)})
                                    if len(errors) > 0:
                                        self.errors += errors
                                    collection = None
                                else:
                                    # if there is no collection assigned to this node, create one and assign it to the node
                                    try:
                                        # check to see that a collection with this legacyid does not already exist
                                        collection = Concept().get(legacyoid=collection_legacyoid)
                                        errors.append({'type': 'WARNING', 'message': 'A collection with the legacyid {0} already exists.'.format(node.name + '_' + str(node.graph_id) + '_import')})
                                        if len(errors) > 0:
                                            self.errors += errors
                                    except:
                                        collection = Concept({
                                            'id': collectionid,
                                            'legacyoid': collection_legacyoid,
                                            'nodetype': 'Collection'
                                        })
                                        collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        node.config['rdmCollection'] = collectionid
                                        node.save()
                                        collection.save()
                            else:
                                # if create collection = add check that there is a collection associated with node, if no collection associated with node create a collection and associated with the node
                                try:
                                    collection = Concept().get(id=node.config['rdmCollection'])
                                except:
                                    collection = Concept({
                                        'id': collectionid,
                                        'legacyoid': node.name + '_' + str(node.graph_id) + '_import',
                                        'nodetype': 'Collection'
                                    })
                                    collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    node.config['rdmCollection'] = collectionid
                                    node.save()
                                    collection.save()

                            if collection != None:
                                topconcept_legacyoid = node.name + '_' + str(node.graph_id)
                                # Check if top concept already exists, if not create it and add to candidates scheme
                                try:
                                    topconcept = Concept().get(legacyoid=topconcept_legacyoid)
                                except:
                                    topconcept = Concept({
                                        'id': topconceptid,
                                        'legacyoid': topconcept_legacyoid,
                                        'nodetype': 'Concept'
                                    })
                                    topconcept.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    topconcept.save()
                                candidates.add_relation(topconcept, 'narrower')

                                # create child concepts and relate to top concept and collection accordingly
                                for conceptid, value in concepts.iteritems():
                                    concept_legacyoid = value + '_' + node.name + '_' + str(node.graph_id)
                                    # check if concept already exists, if not create and add to topconcept and collection
                                    try:
                                        conceptid = [concept for concept in topconcept.get_child_concepts(topconcept.id) if concept[1] == value][0][0]
                                        concept = Concept().get(id=conceptid)
                                    except:
                                        concept = Concept({
                                            'id': conceptid,
                                            'legacyoid': concept_legacyoid,
                                            'nodetype': 'Concept'
                                        })
                                        concept.addvalue({'id': str(uuid.uuid4()), 'value': value, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        concept.save()
                                    collection.add_relation(concept, 'member')
                                    topconcept.add_relation(concept, 'narrower')

                        #if node.datatype is domain or domain-list create options array in node.config
                        elif node.datatype in ['domain-value', 'domain-value-list']:
                            for domainid, value in new_concepts[arches_nodeid].iteritems():
                                # check if value already exists in domain
                                if value not in [t['text'] for t in node.config['options']]:
                                    domainvalue = {
                                        "text": value,
                                        "selected": False,
                                        "id": domainid
                                    }
                                    node.config['options'].append(domainvalue)
                                    node.save()

                if create_concepts == True:
                    create_reference_data(concepts_to_create, create_collections)
                # if concepts are created on import concept_lookup must be instatiated afterward
                concept_lookup = ConceptLookup()

                def cache(blank_tile):
                    if blank_tile.data != {}:
                        for key in blank_tile.data.keys():
                            if key not in blanktilecache:
                                blanktilecache[str(key)] = blank_tile
                    else:
                        for nodegroup, tile in blank_tile.tiles.iteritems():
                            for key in tile[0].data.keys():
                                if key not in blanktilecache:
                                    blanktilecache[str(key)] = blank_tile

                def column_names_to_targetids(row, mapping, row_number):
                    errors = []
                    new_row = []
                    if 'ADDITIONAL' in row or 'MISSING' in row:
                        errors.append({'type': 'WARNING', 'message': 'No resource created for ResourceID {0}. Line {1} has additional or missing columns.'.format(row['ResourceID'], str(int(row_number.split('on line ')[1])))})
                        if len(errors) > 0:
                            self.errors += errors
                    for key, value in row.iteritems():
                        if value != '':
                            for row in mapping['nodes']:
                                if key.upper() == row['file_field_name'].upper():
                                    new_row.append({row['arches_nodeid']: value})
                    return new_row

                def transform_value(datatype, value, source, nodeid):
                    '''
                    Transforms values from probably string/wkt representation to specified datatype in arches.
                    This code could probably move to somehwere where it can be accessed by other importers.
                    '''
                    request = ''
                    if datatype != '':
                        errors = []
                        datatype_instance = datatype_factory.get_instance(datatype)
                        if datatype in ['concept', 'domain-value', 'concept-list', 'domain-value-list']:
                            try:
                                uuid.UUID(value)
                            except:
                                if datatype in ['domain-value', 'domain-value-list']:
                                    collection_id = nodeid
                                else:
                                    collection_id = Node.objects.get(nodeid=nodeid).config['rdmCollection']
                                if collection_id != None:
                                    value = concept_lookup.lookup_labelid_from_label(value, collection_id)
                        try:
                            value = datatype_instance.transform_import_values(value, nodeid)
                            errors = datatype_instance.validate(value, row_number, source)
                        except Exception as e:
                            errors.append({'type': 'ERROR', 'message': 'datatype: {0} value: {1} {2} - {3}'.format(datatype_instance.datatype_model.classname, value, source, str(e) + ' or is not a prefLabel in the given collection.')})
                        if len(errors) > 0:
                            error_types = [error['type'] for error in errors]
                            if 'ERROR' in error_types:
                                value = None
                            self.errors += errors
                    else:
                        print _('No datatype detected for {0}'.format(value))

                    return {'value': value, 'request': request}

                def get_blank_tile(source_data):
                    if len(source_data) > 0:
                        if source_data[0] != {}:
                            key = str(source_data[0].keys()[0])
                            if key not in blanktilecache:
                                blank_tile = Tile.get_blank_tile(key)
                                cache(blank_tile)
                            else:
                                blank_tile = blanktilecache[key]
                        else:
                            blank_tile = None
                    else:
                        blank_tile = None
                    # return deepcopy(blank_tile)
                    return cPickle.loads(cPickle.dumps(blank_tile, -1))

                def check_required_nodes(tile, parent_tile, required_nodes, all_nodes):
                    # Check that each required node in a tile is populated.
                    errors = []
                    if len(required_nodes) > 0:
                        if bool(tile.data):
                            for target_k, target_v in tile.data.iteritems():
                                if target_k in required_nodes.keys() and target_v is None:
                                    populated_tiles.pop(populated_tiles.index(parent_tile))
                                    errors.append({'type': 'WARNING', 'message': 'The {0} node is required and must be populated in order to populate the {1} nodes. This data was not imported.'.format(required_nodes[target_k],  ', '.join(all_nodes.filter(nodegroup_id=str(target_tile.nodegroup_id)).values_list('name', flat=True)))})
                        elif bool(tile.tiles):
                            for tile_k, tile_v in tile.tiles.iteritems():
                                if len(tile_v) > 0:
                                    for t in tile_v:
                                        check_required_nodes(t, parent_tile, required_nodes, all_nodes)
                    if len(errors) > 0:
                        self.errors += errors

                resources = []

                for row_number, row in enumerate(business_data):
                    row_number = 'on line ' + unicode(row_number + 2) #to represent the row in a csv accounting for the header and 0 index
                    if row['ResourceID'] != previous_row_resourceid and previous_row_resourceid is not None:

                        save_count = save_count + 1
                        self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count)

                        # reset values for next resource instance
                        populated_tiles = []
                        resourceinstanceid = process_resourceid(row['ResourceID'], overwrite)
                        populated_nodegroups[resourceinstanceid] = []

                    source_data = column_names_to_targetids(row, mapping, row_number)

                    missing_display_nodes = [n for n in display_nodes if n not in [list(b) for b in zip(*[a.keys() for a in source_data])][0]]
                    if len(missing_display_nodes) > 0:
                        errors = []
                        for mdn in missing_display_nodes:
                            mdn_name = all_nodes.filter(nodeid=mdn).values_list('name', flat=True)[0]
                            errors.append({'type': 'WARNING', 'message': '{0} {1} is null or not mapped and participates in a {2} display value function.'.format(mdn_name, row_number, mapping['resource_model_name'])})
                        if len(errors) > 0:
                            self.errors += errors

                    if len(source_data) > 0:
                        if source_data[0].keys():
                            try:
                                target_resource_model = all_nodes.get(nodeid=source_data[0].keys()[0]).graph_id
                            except:
                                print '*'*80
                                print 'ERROR: No resource model found. Please make sure the resource model this business data is mapped to has been imported into Arches.'
                                print '*'*80
                                sys.exit()

                        target_tile = get_blank_tile(source_data)
                        if 'TileID' in row and row['TileID'] is not None:
                            target_tile.tileid = row['TileID']
                        if 'NodeGroupID' in row and row['NodeGroupID'] is not None:
                            target_tile.nodegroupid = row['NodeGroupID']

                        def populate_tile(source_data, target_tile):
                            '''
                            source_data = [{nodeid:value},{nodeid:value},{nodeid:value} . . .]
                            All nodes in source_data belong to the same resource.
                            A dictionary of nodeids would not allow for multiple values for the same nodeid.
                            Grouping is enforced by having all grouped attributes in the same row.
                            '''
                            need_new_tile = False
                            # Set target tileid to None because this will be a new tile, a new tileid will be created on save.
                            target_tile.tileid = uuid.uuid4()
                            if 'TileID' in row and row['TileID'] is not None:
                                target_tile.tileid = row['TileID']
                            target_tile.resourceinstance_id = resourceinstanceid
                            # Check the cardinality of the tile and check if it has been populated.
                            # If cardinality is one and the tile is populated the tile should not be populated again.
                            if str(target_tile.nodegroup_id) in single_cardinality_nodegroups and 'TileiD' not in row:
                                target_tile_cardinality = '1'
                            else:
                                target_tile_cardinality = 'n'

                            if str(target_tile.nodegroup_id) not in populated_nodegroups[resourceinstanceid]:
                                # Check if we are populating a parent tile by inspecting the target_tile.data array.
                                if target_tile.data != {}:
                                    # Iterate through the target_tile nodes and begin populating by iterating througth source_data array.
                                    # The idea is to populate as much of the target_tile as possible, before moving on to the next target_tile.
                                    for target_key in target_tile.data.keys():
                                        for source_tile in source_data:
                                            for source_key in source_tile.keys():
                                                # Check for source and target key match.
                                                if source_key == target_key:
                                                    if target_tile.data[source_key] == None:
                                                        # If match populate target_tile node with transformed value.
                                                        value = transform_value(node_datatypes[source_key], source_tile[source_key], row_number, source_key)
                                                        target_tile.data[source_key] = value['value']
                                                        # target_tile.request = value['request']
                                                        # Delete key from source_tile so we do not populate another tile based on the same data.
                                                        del source_tile[source_key]
                                    # Cleanup source_data array to remove source_tiles that are now '{}' from the code above.
                                    source_data[:] = [item for item in source_data if item != {}]

                                # Check if we are populating a child tile(s) by inspecting the target_tiles.tiles array.
                                elif target_tile.tiles != None:
                                    populated_child_nodegroups = []
                                    for nodegroupid, childtile in target_tile.tiles.iteritems():
                                        prototype_tile = childtile.pop()
                                        if str(prototype_tile.nodegroup_id) in single_cardinality_nodegroups:
                                            child_tile_cardinality = '1'
                                        else:
                                            child_tile_cardinality = 'n'

                                        def populate_child_tiles(source_data):
                                            prototype_tile_copy = cPickle.loads(cPickle.dumps(prototype_tile, -1))
                                            tileid = row['TileID'] if 'TileID' in row else uuid.uuid4()
                                            prototype_tile_copy.tileid = tileid
                                            prototype_tile_copy.parenttile = target_tile
                                            parenttileid = row['ParentTileID'] if 'ParentTileID' in row and row['ParentTileID'] is not None else None
                                            if parenttileid is not None:
                                                prototype_tile_copy.parenttile.tileid = parenttileid
                                            prototype_tile_copy.resourceinstance_id = resourceinstanceid
                                            if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                for target_key in prototype_tile_copy.data.keys():
                                                    for source_column in source_data:
                                                        for source_key in source_column.keys():
                                                            if source_key == target_key:
                                                                if prototype_tile_copy.data[source_key] == None:
                                                                    value = transform_value(node_datatypes[source_key], source_column[source_key], row_number, source_key)
                                                                    prototype_tile_copy.data[source_key] = value['value']
                                                                    # print prototype_tile_copy.data[source_key]
                                                                    # print '&'*80
                                                                    # target_tile.request = value['request']
                                                                    del source_column[source_key]
                                                                else:
                                                                    populate_child_tiles(source_data)


                                            if prototype_tile_copy.data != {}:
                                                if len([item for item in prototype_tile_copy.data.values() if item != None]) > 0:
                                                    if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                        childtile.append(prototype_tile_copy)

                                            if prototype_tile_copy != None:
                                                if child_tile_cardinality == '1' and 'NodeGroupID' not in row:
                                                    populated_child_nodegroups.append(str(prototype_tile_copy.nodegroup_id))

                                            source_data[:] = [item for item in source_data if item != {}]

                                        populate_child_tiles(source_data)

                                if not target_tile.is_blank():
                                    populated_tiles.append(target_tile)

                                if len(source_data)>0:
                                    need_new_tile = True

                                if target_tile_cardinality == '1' and 'NodeGroupID' not in row:
                                    populated_nodegroups[resourceinstanceid].append(str(target_tile.nodegroup_id))

                                if need_new_tile:
                                    new_tile = get_blank_tile(source_data)
                                    if new_tile != None:
                                        populate_tile(source_data, new_tile)

                        # mock_request_object = HttpRequest()

                        if target_tile != None and len(source_data) > 0:
                            populate_tile(source_data, target_tile)
                            # Check that required nodes are populated. If not remove tile from populated_tiles array.
                            check_required_nodes(target_tile, target_tile, required_nodes, all_nodes)

                    previous_row_resourceid = row['ResourceID']
                    legacyid = row['ResourceID']

                if 'legacyid' in locals():
                    self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count)

                if bulk:
                    Resource.bulk_save(resources=resources)

                print _('%s total resource saved' % (save_count + 1))

        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            formatted = traceback.format_exception(exc_type, exc_value, exc_traceback)
            if len(formatted):
                for message in formatted:
                    print message

        finally:
            pass
Esempio n. 6
0
    def import_business_data(self,
                             business_data=None,
                             mapping=None,
                             overwrite='append',
                             bulk=False):
        # errors = businessDataValidator(self.business_data)

        def process_resourceid(resourceid, overwrite):
            # Test if resourceid is a UUID.
            try:
                resourceinstanceid = uuid.UUID(resourceid)
                # If resourceid is a UUID check if it is already an arches resource.
                try:
                    ret = Resource.objects.filter(
                        resourceinstanceid=resourceid)
                    # If resourceid is an arches resource and overwrite is true, delete the existing arches resource.
                    if overwrite == 'overwrite':
                        Resource.objects.get(
                            pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = resourceinstanceid
                # If resourceid is not a UUID create one.
                except:
                    resourceinstanceid = resourceinstanceid
            except:
                # Get resources with the given legacyid
                ret = Resource.objects.filter(legacyid=resourceid)
                # If more than one resource is returned than make resource = None. This should never actually happen.
                if len(ret) > 1:
                    resourceinstanceid = None
                # If no resource is returned with the given legacyid then create an archesid for the resource.
                elif len(ret) == 0:
                    resourceinstanceid = uuid.uuid4()
                # If a resource is returned with the give legacyid then return its archesid
                else:
                    if overwrite == 'overwrite':
                        Resource.objects.get(
                            pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = ret[0].resourceinstanceid

            return resourceinstanceid

        try:
            with transaction.atomic():
                save_count = 0
                try:
                    resourceinstanceid = process_resourceid(
                        business_data[0]['ResourceID'], overwrite)
                except KeyError:
                    print '*' * 80
                    print 'ERROR: No column \'ResourceID\' found in business data file. Please add a \'ResourceID\' column with a unique resource identifier.'
                    print '*' * 80
                    sys.exit()
                blanktilecache = {}
                populated_nodegroups = {}
                populated_nodegroups[resourceinstanceid] = []
                previous_row_resourceid = None
                populated_tiles = []
                target_resource_model = None
                single_cardinality_nodegroups = [
                    str(nodegroupid)
                    for nodegroupid in NodeGroup.objects.values_list(
                        'nodegroupid', flat=True).filter(cardinality='1')
                ]
                node_datatypes = {
                    str(nodeid): datatype
                    for nodeid, datatype in Node.objects.values_list(
                        'nodeid', 'datatype').filter(~Q(datatype='semantic'),
                                                     graph__isresource=True)
                }
                all_nodes = Node.objects.all()
                datatype_factory = DataTypeFactory()
                concept_lookup = ConceptLookup()
                new_concepts = {}
                required_nodes = {}
                for node in Node.objects.filter(isrequired=True).values_list(
                        'nodeid', 'name'):
                    required_nodes[str(node[0])] = node[1]

                # This code can probably be moved into it's own module.
                resourceids = []
                non_contiguous_resource_ids = []
                previous_row_for_validation = None

                for row_number, row in enumerate(business_data):
                    # Check contiguousness of csv file.
                    if row['ResourceID'] != previous_row_for_validation and row[
                            'ResourceID'] in resourceids:
                        non_contiguous_resource_ids.append(row['ResourceID'])
                    else:
                        resourceids.append(row['ResourceID'])
                    previous_row_for_validation = row['ResourceID']

                if len(non_contiguous_resource_ids) > 0:
                    print '*' * 80
                    for non_contiguous_resource_id in non_contiguous_resource_ids:
                        print 'ResourceID: ' + non_contiguous_resource_id
                    print 'ERROR: The preceding ResourceIDs are non-contiguous in your csv file. Please sort your csv file by ResourceID and try import again.'
                    print '*' * 80
                    sys.exit()

                def cache(blank_tile):
                    if blank_tile.data != {}:
                        for key in blank_tile.data.keys():
                            if key not in blanktilecache:
                                blanktilecache[str(key)] = blank_tile
                    else:
                        for nodegroup, tile in blank_tile.tiles.iteritems():
                            for key in tile[0].data.keys():
                                if key not in blanktilecache:
                                    blanktilecache[str(key)] = blank_tile

                def column_names_to_targetids(row, mapping, row_number):
                    errors = []
                    new_row = []
                    if 'ADDITIONAL' in row or 'MISSING' in row:
                        errors.append({
                            'type':
                            'WARNING',
                            'message':
                            'No resource created for ResourceID {0}. Line {1} has additional or missing columns.'
                            .format(row['ResourceID'],
                                    str(int(row_number.split('on line ')[1])))
                        })
                        if len(errors) > 0:
                            self.errors += errors
                    for key, value in row.iteritems():
                        if value != '':
                            for row in mapping['nodes']:
                                if key.upper() == row['file_field_name'].upper(
                                ):
                                    new_row.append(
                                        {row['arches_nodeid']: value})
                    return new_row

                def transform_value(datatype, value, source, nodeid):
                    '''
                    Transforms values from probably string/wkt representation to specified datatype in arches.
                    This code could probably move to somehwere where it can be accessed by other importers.
                    '''
                    request = ''
                    if datatype != '':
                        errors = []
                        datatype_instance = datatype_factory.get_instance(
                            datatype)
                        if datatype in [
                                'concept', 'domain-value', 'concept-list',
                                'domain-value-list'
                        ]:
                            try:
                                uuid.UUID(value)
                            except:
                                if datatype in [
                                        'domain-value', 'domain-value-list'
                                ]:
                                    collection_id = nodeid
                                else:
                                    collection_id = Node.objects.get(
                                        nodeid=nodeid).config['rdmCollection']
                                if collection_id != None:
                                    value = concept_lookup.lookup_labelid_from_label(
                                        value, collection_id)
                        try:
                            value = datatype_instance.transform_import_values(
                                value, nodeid)
                            errors = datatype_instance.validate(value, source)
                        except Exception as e:
                            errors.append({
                                'type':
                                'ERROR',
                                'message':
                                'datatype: {0} value: {1} {2} - {3}'.format(
                                    datatype_instance.datatype_model.classname,
                                    value, source, e)
                            })
                        if len(errors) > 0:
                            value = None
                            self.errors += errors
                    else:
                        print _('No datatype detected for {0}'.format(value))

                    return {'value': value, 'request': request}

                def get_blank_tile(source_data):
                    if len(source_data) > 0:
                        if source_data[0] != {}:
                            key = str(source_data[0].keys()[0])
                            if key not in blanktilecache:
                                blank_tile = Tile.get_blank_tile(key)
                                cache(blank_tile)
                            else:
                                blank_tile = blanktilecache[key]
                        else:
                            blank_tile = None
                    else:
                        blank_tile = None
                    # return deepcopy(blank_tile)
                    return cPickle.loads(cPickle.dumps(blank_tile, -1))

                def check_required_nodes(tile, required_nodes, all_nodes):
                    # Check that each required node in a tile is populated.
                    errors = []
                    if len(required_nodes) > 0:
                        if target_tile.data != {}:
                            for target_k, target_v in target_tile.data.iteritems(
                            ):
                                if target_k in required_nodes.keys(
                                ) and target_v is None:
                                    populated_tiles.pop(
                                        populated_tiles.index(target_tile))
                                    errors.append({
                                        'type':
                                        'WARNING',
                                        'message':
                                        'The {0} node is required and must be populated in order to populate the {1} nodes. This data was not imported.'
                                        .format(
                                            required_nodes[target_k],
                                            ', '.join(
                                                all_nodes.
                                                filter(nodegroup_id=str(
                                                    target_tile.nodegroup_id
                                                )).values_list('name',
                                                               flat=True)))
                                    })
                        elif target_tile.tiles != None:
                            for tile in tiles:
                                check_required_nodes(tile)
                    if len(errors) > 0:
                        self.errors += errors

                resources = []

                for row_number, row in enumerate(business_data):
                    row_number = 'on line ' + unicode(
                        row_number + 2
                    )  #to represent the row in a csv accounting for the header and 0 index
                    if row['ResourceID'] != previous_row_resourceid and previous_row_resourceid is not None:

                        save_count = save_count + 1
                        self.save_resource(populated_tiles, resourceinstanceid,
                                           legacyid, resources,
                                           target_resource_model, bulk,
                                           save_count)

                        # reset values for next resource instance
                        populated_tiles = []
                        resourceinstanceid = process_resourceid(
                            row['ResourceID'], overwrite)
                        populated_nodegroups[resourceinstanceid] = []

                    source_data = column_names_to_targetids(
                        row, mapping, row_number)

                    if len(source_data) > 0:
                        if source_data[0].keys():
                            try:
                                target_resource_model = all_nodes.get(
                                    nodeid=source_data[0].keys()[0]).graph_id
                            except:
                                print '*' * 80
                                print 'ERROR: No resource model found. Please make sure the resource model this business data is mapped to has been imported into Arches.'
                                print '*' * 80
                                sys.exit()

                        target_tile = get_blank_tile(source_data)

                        def populate_tile(source_data, target_tile):
                            '''
                            source_data = [{nodeid:value},{nodeid:value},{nodeid:value} . . .]
                            All nodes in source_data belong to the same resource.
                            A dictionary of nodeids would not allow for multiple values for the same nodeid.
                            Grouping is enforced by having all grouped attributes in the same row.
                            '''
                            need_new_tile = False
                            # Set target tileid to None because this will be a new tile, a new tileid will be created on save.
                            target_tile.tileid = uuid.uuid4()
                            target_tile.resourceinstance_id = resourceinstanceid
                            # Check the cardinality of the tile and check if it has been populated.
                            # If cardinality is one and the tile is populated the tile should not be populated again.
                            if str(target_tile.nodegroup_id
                                   ) in single_cardinality_nodegroups:
                                target_tile_cardinality = '1'
                            else:
                                target_tile_cardinality = 'n'

                            if str(
                                    target_tile.nodegroup_id
                            ) not in populated_nodegroups[resourceinstanceid]:
                                # Check if we are populating a parent tile by inspecting the target_tile.data array.
                                if target_tile.data != {}:
                                    # Iterate through the target_tile nodes and begin populating by iterating througth source_data array.
                                    # The idea is to populate as much of the target_tile as possible, before moving on to the next target_tile.
                                    for target_key in target_tile.data.keys():
                                        for source_tile in source_data:
                                            for source_key in source_tile.keys(
                                            ):
                                                # Check for source and target key match.
                                                if source_key == target_key:
                                                    if target_tile.data[
                                                            source_key] == None:
                                                        # If match populate target_tile node with transformed value.
                                                        value = transform_value(
                                                            node_datatypes[
                                                                source_key],
                                                            source_tile[
                                                                source_key],
                                                            row_number,
                                                            source_key)
                                                        target_tile.data[
                                                            source_key] = value[
                                                                'value']
                                                        # target_tile.request = value['request']
                                                        # Delete key from source_tile so we do not populate another tile based on the same data.
                                                        del source_tile[
                                                            source_key]
                                    # Cleanup source_data array to remove source_tiles that are now '{}' from the code above.
                                    source_data[:] = [
                                        item for item in source_data
                                        if item != {}
                                    ]

                                # Check if we are populating a child tile(s) by inspecting the target_tiles.tiles array.
                                elif target_tile.tiles != None:
                                    populated_child_nodegroups = []
                                    for nodegroupid, childtile in target_tile.tiles.iteritems(
                                    ):
                                        prototype_tile = childtile.pop()
                                        if str(
                                                prototype_tile.nodegroup_id
                                        ) in single_cardinality_nodegroups:
                                            child_tile_cardinality = '1'
                                        else:
                                            child_tile_cardinality = 'n'

                                        def populate_child_tiles(source_data):
                                            prototype_tile_copy = cPickle.loads(
                                                cPickle.dumps(
                                                    prototype_tile, -1))
                                            prototype_tile_copy.tileid = uuid.uuid4(
                                            )
                                            prototype_tile_copy.parenttile = target_tile
                                            prototype_tile_copy.resourceinstance_id = resourceinstanceid
                                            if str(
                                                    prototype_tile_copy.
                                                    nodegroup_id
                                            ) not in populated_child_nodegroups:
                                                for target_key in prototype_tile_copy.data.keys(
                                                ):
                                                    for source_column in source_data:
                                                        for source_key in source_column.keys(
                                                        ):
                                                            if source_key == target_key:
                                                                if prototype_tile_copy.data[
                                                                        source_key] == None:
                                                                    value = transform_value(
                                                                        node_datatypes[
                                                                            source_key],
                                                                        source_column[
                                                                            source_key],
                                                                        row_number,
                                                                        source_key
                                                                    )
                                                                    prototype_tile_copy.data[
                                                                        source_key] = value[
                                                                            'value']
                                                                    # target_tile.request = value['request']
                                                                    del source_column[
                                                                        source_key]
                                                                else:
                                                                    populate_child_tiles(
                                                                        source_data
                                                                    )

                                            if prototype_tile_copy.data != {}:
                                                if len([
                                                        item for item in
                                                        prototype_tile_copy.
                                                        data.values()
                                                        if item != None
                                                ]) > 0:
                                                    if str(
                                                            prototype_tile_copy
                                                            .nodegroup_id
                                                    ) not in populated_child_nodegroups:
                                                        childtile.append(
                                                            prototype_tile_copy
                                                        )

                                            if prototype_tile_copy != None:
                                                if child_tile_cardinality == '1':
                                                    populated_child_nodegroups.append(
                                                        str(prototype_tile_copy
                                                            .nodegroup_id))

                                            source_data[:] = [
                                                item for item in source_data
                                                if item != {}
                                            ]

                                        populate_child_tiles(source_data)

                                if not target_tile.is_blank():
                                    populated_tiles.append(target_tile)

                                if len(source_data) > 0:
                                    need_new_tile = True

                                if target_tile_cardinality == '1':
                                    populated_nodegroups[
                                        resourceinstanceid].append(
                                            str(target_tile.nodegroup_id))

                                if need_new_tile:
                                    new_tile = get_blank_tile(source_data)
                                    if new_tile != None:
                                        populate_tile(source_data, new_tile)

                        # mock_request_object = HttpRequest()

                        if target_tile != None and len(source_data) > 0:
                            populate_tile(source_data, target_tile)
                            # Check that required nodes are populated. If not remove tile from populated_tiles array.
                            check_required_nodes(target_tile, required_nodes,
                                                 all_nodes)

                    previous_row_resourceid = row['ResourceID']
                    legacyid = row['ResourceID']

                if 'legacyid' in locals():
                    self.save_resource(populated_tiles, resourceinstanceid,
                                       legacyid, resources,
                                       target_resource_model, bulk, save_count)

                if bulk:
                    Resource.bulk_save(resources=resources)

                print _('%s total resource saved' % (save_count + 1))

        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            formatted = traceback.format_exception(exc_type, exc_value,
                                                   exc_traceback)
            if len(formatted):
                for message in formatted:
                    print message

        finally:
            pass
Esempio n. 7
0
    def import_business_data(self, business_data=None, mapping=None, overwrite='append', bulk=False):
        # errors = businessDataValidator(self.business_data)

        def process_resourceid(resourceid, overwrite):
            # Test if resourceid is a UUID.
            try:
                resourceinstanceid = uuid.UUID(resourceid)
                # If resourceid is a UUID check if it is already an arches resource.
                try:
                    ret = Resource.objects.filter(resourceinstanceid=resourceid)
                    # If resourceid is an arches resource and overwrite is true, delete the existing arches resource.
                    if overwrite == 'overwrite':
                        Resource(str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = resourceinstanceid
                # If resourceid is not a UUID create one.
                except:
                    resourceinstanceid = resourceinstanceid
            except:
                # Get resources with the given legacyid
                ret = Resource.objects.filter(legacyid=resourceid)
                # If more than one resource is returned than make resource = None. This should never actually happen.
                if len(ret) > 1:
                    resourceinstanceid = None
                # If no resource is returned with the given legacyid then create an archesid for the resource.
                elif len(ret) == 0:
                    resourceinstanceid = uuid.uuid4()
                # If a resource is returned with the give legacyid then return its archesid
                else:
                    if overwrite == 'overwrite':
                        Resource(str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = ret[0].resourceinstanceid

            return resourceinstanceid

        try:
            with transaction.atomic():
                save_count = 0
                try:
                    resourceinstanceid = process_resourceid(business_data[0]['ResourceID'], overwrite)
                except KeyError:
                    print '*'*80
                    print 'ERROR: No column \'ResourceID\' found in business data file. Please add a \'ResourceID\' column with a unique resource identifier.'
                    print '*'*80
                    sys.exit()
                blanktilecache = {}
                populated_nodegroups = {}
                populated_nodegroups[resourceinstanceid] = []
                previous_row_resourceid = None
                populated_tiles = []
                single_cardinality_nodegroups = [str(nodegroupid) for nodegroupid in NodeGroup.objects.values_list('nodegroupid', flat=True).filter(cardinality = '1')]
                node_datatypes = {str(nodeid): datatype for nodeid, datatype in  Node.objects.values_list('nodeid', 'datatype').filter(~Q(datatype='semantic'), graph__isresource=True)}
                all_nodes = Node.objects.all()
                datatype_factory = DataTypeFactory()

                # This code can probably be moved into it's own module.
                resourceids = []
                non_contiguous_resource_ids = []
                previous_row_for_validation = None

                for row_number, row in enumerate(business_data):
                    # Check contiguousness of csv file.
                    if row['ResourceID'] != previous_row_for_validation and row['ResourceID'] in resourceids:
                        non_contiguous_resource_ids.append(row['ResourceID'])
                    else:
                        resourceids.append(row['ResourceID'])
                    previous_row_for_validation = row['ResourceID']

                if len(non_contiguous_resource_ids) > 0:
                    print '*'*80
                    print 'ERROR: Resources in your csv file are non-contiguous. Please sort your csv file by ResourceID and try import again.'
                    print '*'*80
                    sys.exit()

                def cache(blank_tile):
                    if blank_tile.data != {}:
                        for key in blank_tile.data.keys():
                            if key not in blanktilecache:
                                blanktilecache[str(key)] = blank_tile
                    else:
                        for nodegroup, tile in blank_tile.tiles.iteritems():
                            for key in tile[0].data.keys():
                                if key not in blanktilecache:
                                    blanktilecache[str(key)] = blank_tile

                def column_names_to_targetids(row, mapping, row_number):
                    errors = []
                    new_row = []
                    if 'ADDITIONAL' in row or 'MISSING' in row:
                        errors.append({'type': 'WARNING', 'message': 'No resource created for ResourceID {0}. Line {1} has additional or missing columns.'.format(row['ResourceID'], str(int(row_number.split('on line ')[1])))})
                        if len(errors) > 0:
                            self.errors += errors
                    for key, value in row.iteritems():
                        if value != '':
                            for row in mapping['nodes']:
                                if key.upper() == row['file_field_name'].upper():
                                    new_row.append({row['arches_nodeid']: value})
                    return new_row

                def transform_value(datatype, value, source):
                    '''
                    Transforms values from probably string/wkt representation to specified datatype in arches.
                    This code could probably move to somehwere where it can be accessed by other importers.
                    '''
                    request = ''
                    if datatype != '':
                        errors = []
                        datatype_instance = datatype_factory.get_instance(datatype)
                        try:
                            value = datatype_instance.transform_import_values(value)
                            errors = datatype_instance.validate(value, source)
                        except Exception as e:
                            errors.append({'type': 'ERROR', 'message': 'datatype: {0} value: {1} {2} - {3}'.format(datatype_instance.datatype_model.classname, value, source, e)})
                            if len(errors) > 0:
                                self.errors += errors
                    else:
                        print _('No datatype detected for {0}'.format(value))

                    return {'value': value, 'request': request}

                def get_blank_tile(source_data):
                    if len(source_data) > 0:
                        if source_data[0] != {}:
                            key = str(source_data[0].keys()[0])
                            if key not in blanktilecache:
                                blank_tile = Tile.get_blank_tile(key)
                                cache(blank_tile)
                            else:
                                blank_tile = blanktilecache[key]
                        else:
                            blank_tile = None
                    else:
                        blank_tile = None
                    # return deepcopy(blank_tile)
                    return cPickle.loads(cPickle.dumps(blank_tile, -1))
                resources = []

                for row_number, row in enumerate(business_data):
                    row_number = 'on line ' + unicode(row_number + 2) #to represent the row in a csv accounting for the header and 0 index
                    if row['ResourceID'] != previous_row_resourceid and previous_row_resourceid is not None:

                        save_count = save_count + 1
                        self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count)

                        # reset values for next resource instance
                        populated_tiles = []
                        resourceinstanceid = process_resourceid(row['ResourceID'], overwrite)
                        populated_nodegroups[resourceinstanceid] = []

                    source_data = column_names_to_targetids(row, mapping, row_number)

                    if len(source_data) > 0:
                        if source_data[0].keys():
                            try:
                                target_resource_model = all_nodes.get(nodeid=source_data[0].keys()[0]).graph_id
                            except:
                                print '*'*80
                                print 'ERROR: No resource model found. Please make sure the resource model this business data is mapped to has been imported into Arches.'
                                print '*'*80
                                sys.exit()

                        target_tile = get_blank_tile(source_data)

                        def populate_tile(source_data, target_tile):
                            '''
                            source_data = [{nodeid:value},{nodeid:value},{nodeid:value} . . .]
                            All nodes in source_data belong to the same resource.
                            A dictionary of nodeids would not allow for multiple values for the same nodeid.
                            Grouping is enforced by having all grouped attributes in the same row.
                            '''
                            need_new_tile = False
                            # Set target tileid to None because this will be a new tile, a new tileid will be created on save.
                            target_tile.tileid = uuid.uuid4()
                            target_tile.resourceinstance_id = resourceinstanceid
                            # Check the cardinality of the tile and check if it has been populated.
                            # If cardinality is one and the tile is populated the tile should not be populated again.
                            if str(target_tile.nodegroup_id) in single_cardinality_nodegroups:
                                target_tile_cardinality = '1'
                            else:
                                target_tile_cardinality = 'n'

                            if str(target_tile.nodegroup_id) not in populated_nodegroups[resourceinstanceid]:
                                # Check if we are populating a parent tile by inspecting the target_tile.data array.
                                if target_tile.data != {}:
                                    # Iterate through the target_tile nodes and begin populating by iterating througth source_data array.
                                    # The idea is to populate as much of the target_tile as possible, before moving on to the next target_tile.
                                    for target_key in target_tile.data.keys():
                                        for source_tile in source_data:
                                            for source_key in source_tile.keys():
                                                # Check for source and target key match.
                                                if source_key == target_key:
                                                    if target_tile.data[source_key] == None:
                                                        # If match populate target_tile node with transformed value.
                                                        value = transform_value(node_datatypes[source_key], source_tile[source_key], row_number)
                                                        target_tile.data[source_key] = value['value']
                                                        # target_tile.request = value['request']
                                                        # Delete key from source_tile so we do not populate another tile based on the same data.
                                                        del source_tile[source_key]
                                    # Cleanup source_data array to remove source_tiles that are now '{}' from the code above.
                                    source_data[:] = [item for item in source_data if item != {}]

                                # Check if we are populating a child tile(s) by inspecting the target_tiles.tiles array.
                                elif target_tile.tiles != None:
                                    populated_child_nodegroups = []
                                    for nodegroupid, childtile in target_tile.tiles.iteritems():
                                        prototype_tile = childtile.pop()
                                        if str(prototype_tile.nodegroup_id) in single_cardinality_nodegroups:
                                            child_tile_cardinality = '1'
                                        else:
                                            child_tile_cardinality = 'n'

                                        def populate_child_tiles(source_data):
                                            prototype_tile_copy = cPickle.loads(cPickle.dumps(prototype_tile, -1))
                                            prototype_tile_copy.tileid = uuid.uuid4()
                                            prototype_tile_copy.parenttile = target_tile
                                            prototype_tile_copy.resourceinstance_id = resourceinstanceid
                                            if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                for target_key in prototype_tile_copy.data.keys():
                                                    for source_column in source_data:
                                                        for source_key in source_column.keys():
                                                            if source_key == target_key:
                                                                if prototype_tile_copy.data[source_key] == None:
                                                                    value = transform_value(node_datatypes[source_key], source_column[source_key], row_number)
                                                                    prototype_tile_copy.data[source_key] = value['value']
                                                                    # target_tile.request = value['request']
                                                                    del source_column[source_key]
                                                                else:
                                                                    populate_child_tiles(source_data)


                                            if prototype_tile_copy.data != {}:
                                                if len([item for item in prototype_tile_copy.data.values() if item != None]) > 0:
                                                    if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                        childtile.append(prototype_tile_copy)

                                            if prototype_tile_copy != None:
                                                if child_tile_cardinality == '1':
                                                    populated_child_nodegroups.append(str(prototype_tile_copy.nodegroup_id))

                                            source_data[:] = [item for item in source_data if item != {}]

                                        populate_child_tiles(source_data)

                                populated_tiles.append(target_tile)

                                if len(source_data)>0:
                                    need_new_tile = True

                                if target_tile_cardinality == '1':
                                    populated_nodegroups[resourceinstanceid].append(str(target_tile.nodegroup_id))

                                if need_new_tile:
                                    new_tile = get_blank_tile(source_data)
                                    if new_tile != None:
                                        populate_tile(source_data, new_tile)

                        # mock_request_object = HttpRequest()

                        if target_tile != None and len(source_data) > 0:
                            populate_tile(source_data, target_tile)

                    previous_row_resourceid = row['ResourceID']
                    legacyid = row['ResourceID']

                if 'legacyid' in locals():
                    self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count)

                if bulk:
                    Resource.bulk_save(resources=resources)
                    print _('%s total resource saved' % (save_count + 1))

        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            formatted = traceback.format_exception(exc_type, exc_value, exc_traceback)
            if len(formatted):
                for message in formatted:
                    print message

        finally:
            pass
Esempio n. 8
0
    def import_business_data(self, business_data=None, mapping=None, overwrite='append', bulk=False, create_concepts=False, create_collections=False):
        # errors = businessDataValidator(self.business_data)

        def get_display_nodes(graphid):
            display_nodeids = []
            functions = FunctionXGraph.objects.filter(function_id='60000000-0000-0000-0000-000000000001', graph_id=graphid)
            for function in functions:
                f = function.config
                del f['triggering_nodegroups']

                for k,v in f.iteritems():
                    v['node_ids'] = []
                    v['string_template'] = v['string_template'].replace('<', '').replace('>', '').split(', ')
                    if 'nodegroup_id' in v and v['nodegroup_id'] != '':
                        nodes = Node.objects.filter(nodegroup_id=v['nodegroup_id'])
                        for node in nodes:
                            if node.name in v['string_template']:
                                display_nodeids.append(str(node.nodeid))

                for k,v in f.iteritems():
                    if 'string_template' in v and v['string_template'] != ['']:
                        print 'The {0} {1} in the {2} display function.'.format(', '.join(v['string_template']), 'nodes participate' if len(v['string_template']) > 1 else 'node participates', k)
                    else:
                        print 'No nodes participate in the {0} display function.'.format(k)

            return display_nodeids

        def process_resourceid(resourceid, overwrite):
            # Test if resourceid is a UUID.
            try:
                resourceinstanceid = uuid.UUID(resourceid)
                # If resourceid is a UUID check if it is already an arches resource.
                try:
                    ret = Resource.objects.filter(resourceinstanceid=resourceid)
                    # If resourceid is an arches resource and overwrite is true, delete the existing arches resource.
                    if overwrite == 'overwrite':
                        Resource.objects.get(pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = resourceinstanceid
                # If resourceid is not a UUID create one.
                except:
                    resourceinstanceid = resourceinstanceid
            except:
                # Get resources with the given legacyid
                ret = Resource.objects.filter(legacyid=resourceid)
                # If more than one resource is returned than make resource = None. This should never actually happen.
                if len(ret) > 1:
                    resourceinstanceid = None
                # If no resource is returned with the given legacyid then create an archesid for the resource.
                elif len(ret) == 0:
                    resourceinstanceid = uuid.uuid4()
                # If a resource is returned with the give legacyid then return its archesid
                else:
                    if overwrite == 'overwrite':
                        Resource.objects.get(pk=str(ret[0].resourceinstanceid)).delete()
                    resourceinstanceid = ret[0].resourceinstanceid

            return resourceinstanceid

        try:
            with transaction.atomic():
                save_count = 0
                try:
                    resourceinstanceid = process_resourceid(business_data[0]['ResourceID'], overwrite)
                except KeyError:
                    print '*'*80
                    print 'ERROR: No column \'ResourceID\' found in business data file. Please add a \'ResourceID\' column with a unique resource identifier.'
                    print '*'*80
                    sys.exit()
                blanktilecache = {}
                populated_nodegroups = {}
                populated_nodegroups[resourceinstanceid] = []
                previous_row_resourceid = None
                populated_tiles = []
                target_resource_model = None
                single_cardinality_nodegroups = [str(nodegroupid) for nodegroupid in NodeGroup.objects.values_list('nodegroupid', flat=True).filter(cardinality = '1')]
                node_datatypes = {str(nodeid): datatype for nodeid, datatype in  Node.objects.values_list('nodeid', 'datatype').filter(~Q(datatype='semantic'), graph__isresource=True)}
                display_nodes = get_display_nodes(mapping['resource_model_id'])
                all_nodes = Node.objects.all()
                datatype_factory = DataTypeFactory()
                concepts_to_create = {}
                new_concepts = {}
                required_nodes = {}
                for node in Node.objects.filter(~Q(datatype='semantic'), isrequired=True, graph_id=mapping['resource_model_id']).values_list('nodeid', 'name'):
                    required_nodes[str(node[0])] = node[1]

                # This code can probably be moved into it's own module.
                resourceids = []
                non_contiguous_resource_ids = []
                previous_row_for_validation = None

                for row_number, row in enumerate(business_data):
                    # Check contiguousness of csv file.
                    if row['ResourceID'] != previous_row_for_validation and row['ResourceID'] in resourceids:
                        non_contiguous_resource_ids.append(row['ResourceID'])
                    else:
                        resourceids.append(row['ResourceID'])
                    previous_row_for_validation = row['ResourceID']

                    if create_concepts == True:
                        for node in mapping['nodes']:
                            if node['data_type'] in ['concept', 'concept-list', 'domain-value', 'domain-value-list'] and node['file_field_name'] in row.keys():

                                # print row[node['file_field_name']]
                                concept = []
                                for val in csv.reader([row[node['file_field_name']]], delimiter=',', quotechar='"'):
                                    concept.append(val)
                                concept = concept[0]

                                # check if collection is in concepts_to_create, add collection to concepts_to_create if it's not and add first child concept
                                if node['arches_nodeid'] not in concepts_to_create:
                                    concepts_to_create[node['arches_nodeid']] = {}
                                    for concept_value in concept:
                                        concepts_to_create[node['arches_nodeid']][str(uuid.uuid4())] = concept_value
                                # if collection in concepts to create then add child concept to collection
                                elif row[node['file_field_name']] not in concepts_to_create[node['arches_nodeid']].values():
                                    for concept_value in concept:
                                        concepts_to_create[node['arches_nodeid']][str(uuid.uuid4())] = concept_value

                if len(non_contiguous_resource_ids) > 0:
                    print '*'*80
                    for non_contiguous_resource_id in non_contiguous_resource_ids:
                        print 'ResourceID: ' + non_contiguous_resource_id
                    print 'ERROR: The preceding ResourceIDs are non-contiguous in your csv file. Please sort your csv file by ResourceID and try import again.'
                    print '*'*80
                    sys.exit()

                def create_reference_data(new_concepts, create_collections):
                    errors = []
                    candidates = Concept().get(id='00000000-0000-0000-0000-000000000006')
                    for arches_nodeid, concepts in new_concepts.iteritems():
                        collectionid = str(uuid.uuid4())
                        topconceptid = str(uuid.uuid4())
                        node = Node.objects.get(nodeid=arches_nodeid)

                        # if node.datatype is concept or concept-list create concepts and collections
                        if node.datatype in ['concept', 'concept-list']:
                            # create collection if create_collections = create, otherwise append to collection already assigned to node
                            if create_collections == True:
                                collection_legacyoid = node.name + '_' + str(node.graph_id) + '_import'
                                # check to see that there is not already a collection for this node
                                if node.config['rdmCollection'] != None:
                                    errors.append({'type': 'WARNING', 'message': 'A collection already exists for the {0} node. Use the add option to add concepts to this collection.'.format(node.name)})
                                    if len(errors) > 0:
                                        self.errors += errors
                                    collection = None
                                else:
                                    # if there is no collection assigned to this node, create one and assign it to the node
                                    try:
                                        # check to see that a collection with this legacyid does not already exist
                                        collection = Concept().get(legacyoid=collection_legacyoid)
                                        errors.append({'type': 'WARNING', 'message': 'A collection with the legacyid {0} already exists.'.format(node.name + '_' + str(node.graph_id) + '_import')})
                                        if len(errors) > 0:
                                            self.errors += errors
                                    except:
                                        collection = Concept({
                                            'id': collectionid,
                                            'legacyoid': collection_legacyoid,
                                            'nodetype': 'Collection'
                                        })
                                        collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        node.config['rdmCollection'] = collectionid
                                        node.save()
                                        collection.save()
                            else:
                                # if create collection = add check that there is a collection associated with node, if no collection associated with node create a collection and associated with the node
                                try:
                                    collection = Concept().get(id=node.config['rdmCollection'])
                                except:
                                    collection = Concept({
                                        'id': collectionid,
                                        'legacyoid': node.name + '_' + str(node.graph_id) + '_import',
                                        'nodetype': 'Collection'
                                    })
                                    collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    node.config['rdmCollection'] = collectionid
                                    node.save()
                                    collection.save()

                            if collection != None:
                                topconcept_legacyoid = node.name + '_' + str(node.graph_id)
                                # Check if top concept already exists, if not create it and add to candidates scheme
                                try:
                                    topconcept = Concept().get(legacyoid=topconcept_legacyoid)
                                except:
                                    topconcept = Concept({
                                        'id': topconceptid,
                                        'legacyoid': topconcept_legacyoid,
                                        'nodetype': 'Concept'
                                    })
                                    topconcept.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    topconcept.save()
                                candidates.add_relation(topconcept, 'narrower')

                                # create child concepts and relate to top concept and collection accordingly
                                for conceptid, value in concepts.iteritems():
                                    concept_legacyoid = value + '_' + node.name + '_' + str(node.graph_id)
                                    # check if concept already exists, if not create and add to topconcept and collection
                                    try:
                                        conceptid = [concept for concept in topconcept.get_child_concepts(topconcept.id) if concept[1] == value][0][0]
                                        concept = Concept().get(id=conceptid)
                                    except:
                                        concept = Concept({
                                            'id': conceptid,
                                            'legacyoid': concept_legacyoid,
                                            'nodetype': 'Concept'
                                        })
                                        concept.addvalue({'id': str(uuid.uuid4()), 'value': value, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        concept.save()
                                    collection.add_relation(concept, 'member')
                                    topconcept.add_relation(concept, 'narrower')

                        #if node.datatype is domain or domain-list create options array in node.config
                        elif node.datatype in ['domain-value', 'domain-value-list']:
                            for domainid, value in new_concepts[arches_nodeid].iteritems():
                                # check if value already exists in domain
                                if value not in [t['text'] for t in node.config['options']]:
                                    domainvalue = {
                                        "text": value,
                                        "selected": False,
                                        "id": domainid
                                    }
                                    node.config['options'].append(domainvalue)
                                    node.save()

                if create_concepts == True:
                    create_reference_data(concepts_to_create, create_collections)
                # if concepts are created on import concept_lookup must be instatiated afterward
                concept_lookup = ConceptLookup()

                def cache(blank_tile):
                    if blank_tile.data != {}:
                        for key in blank_tile.data.keys():
                            if key not in blanktilecache:
                                blanktilecache[str(key)] = blank_tile
                    else:
                        for tile in blank_tile.tiles:
                            for key in tile.data.keys():
                                if key not in blanktilecache:
                                    blanktilecache[str(key)] = blank_tile

                def column_names_to_targetids(row, mapping, row_number):
                    errors = []
                    new_row = []
                    if 'ADDITIONAL' in row or 'MISSING' in row:
                        errors.append({'type': 'WARNING', 'message': 'No resource created for ResourceID {0}. Line {1} has additional or missing columns.'.format(row['ResourceID'], str(int(row_number.split('on line ')[1])))})
                        if len(errors) > 0:
                            self.errors += errors
                    for key, value in row.iteritems():
                        if value != '':
                            for row in mapping['nodes']:
                                if key.upper() == row['file_field_name'].upper():
                                    new_row.append({row['arches_nodeid']: value})
                    return new_row

                def transform_value(datatype, value, source, nodeid):
                    '''
                    Transforms values from probably string/wkt representation to specified datatype in arches.
                    This code could probably move to somehwere where it can be accessed by other importers.
                    '''
                    request = ''
                    if datatype != '':
                        errors = []
                        datatype_instance = datatype_factory.get_instance(datatype)
                        if datatype in ['concept', 'domain-value', 'concept-list', 'domain-value-list']:
                            try:
                                uuid.UUID(value)
                            except:
                                if datatype in ['domain-value', 'domain-value-list']:
                                    collection_id = nodeid
                                else:
                                    collection_id = Node.objects.get(nodeid=nodeid).config['rdmCollection']
                                if collection_id != None:
                                    value = concept_lookup.lookup_labelid_from_label(value, collection_id)
                        try:
                            value = datatype_instance.transform_import_values(value, nodeid)
                            errors = datatype_instance.validate(value, row_number, source)
                        except Exception as e:
                            errors.append({'type': 'ERROR', 'message': 'datatype: {0} value: {1} {2} - {3}'.format(datatype_instance.datatype_model.classname, value, source, str(e) + ' or is not a prefLabel in the given collection.')})
                        if len(errors) > 0:
                            error_types = [error['type'] for error in errors]
                            if 'ERROR' in error_types:
                                value = None
                            self.errors += errors
                    else:
                        print _('No datatype detected for {0}'.format(value))

                    return {'value': value, 'request': request}

                def get_blank_tile(source_data):
                    if len(source_data) > 0:
                        if source_data[0] != {}:
                            key = str(source_data[0].keys()[0])
                            if key not in blanktilecache:
                                blank_tile = Tile.get_blank_tile(key)
                                cache(blank_tile)
                            else:
                                blank_tile = blanktilecache[key]
                        else:
                            blank_tile = None
                    else:
                        blank_tile = None
                    # return deepcopy(blank_tile)
                    return cPickle.loads(cPickle.dumps(blank_tile, -1))

                def check_required_nodes(tile, parent_tile, required_nodes, all_nodes):
                    # Check that each required node in a tile is populated.
                    errors = []
                    if len(required_nodes) > 0:
                        if bool(tile.data):
                            for target_k, target_v in tile.data.iteritems():
                                if target_k in required_nodes.keys() and target_v is None:
                                    if parent_tile in populated_tiles:
                                        populated_tiles.pop(populated_tiles.index(parent_tile))
                                    errors.append({'type': 'WARNING', 'message': 'The {0} node is required and must be populated in order to populate the {1} nodes. This data was not imported.'.format(required_nodes[target_k],  ', '.join(all_nodes.filter(nodegroup_id=str(target_tile.nodegroup_id)).values_list('name', flat=True)))})
                        elif bool(tile.tiles):
                            for tile in tile.tiles:
                                check_required_nodes(tile, parent_tile, required_nodes, all_nodes)
                    if len(errors) > 0:
                        self.errors += errors

                resources = []
                missing_display_values = {}

                for row_number, row in enumerate(business_data):
                    row_number = 'on line ' + unicode(row_number + 2) #to represent the row in a csv accounting for the header and 0 index
                    if row['ResourceID'] != previous_row_resourceid and previous_row_resourceid is not None:

                        save_count = save_count + 1
                        self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count, row_number)

                        # reset values for next resource instance
                        populated_tiles = []
                        resourceinstanceid = process_resourceid(row['ResourceID'], overwrite)
                        populated_nodegroups[resourceinstanceid] = []

                    source_data = column_names_to_targetids(row, mapping, row_number)

                    row_keys = [list(b) for b in zip(*[a.keys() for a in source_data])]
                    if len(row_keys) > 0:
                        missing_display_nodes = [n for n in display_nodes if n not in row_keys]
                    if len(missing_display_nodes) > 0:
                        errors = []
                        for mdn in missing_display_nodes:
                            mdn_name = all_nodes.filter(nodeid=mdn).values_list('name', flat=True)[0]
                            try:
                                missing_display_values[mdn_name].append(row_number.split('on line ')[-1])
                            except:
                                missing_display_values[mdn_name] = [row_number.split('on line ')[-1]]

                    if len(source_data) > 0:
                        if source_data[0].keys():
                            try:
                                target_resource_model = all_nodes.get(nodeid=source_data[0].keys()[0]).graph_id
                            except:
                                print '*'*80
                                print 'ERROR: No resource model found. Please make sure the resource model this business data is mapped to has been imported into Arches.'
                                print '*'*80
                                sys.exit()

                        target_tile = get_blank_tile(source_data)
                        if 'TileID' in row and row['TileID'] is not None:
                            target_tile.tileid = row['TileID']
                        if 'NodeGroupID' in row and row['NodeGroupID'] is not None:
                            target_tile.nodegroupid = row['NodeGroupID']

                        def populate_tile(source_data, target_tile):
                            '''
                            source_data = [{nodeid:value},{nodeid:value},{nodeid:value} . . .]
                            All nodes in source_data belong to the same resource.
                            A dictionary of nodeids would not allow for multiple values for the same nodeid.
                            Grouping is enforced by having all grouped attributes in the same row.
                            '''
                            need_new_tile = False
                            # Set target tileid to None because this will be a new tile, a new tileid will be created on save.
                            target_tile.tileid = uuid.uuid4()
                            if 'TileID' in row and row['TileID'] is not None:
                                target_tile.tileid = row['TileID']
                            target_tile.resourceinstance_id = resourceinstanceid
                            # Check the cardinality of the tile and check if it has been populated.
                            # If cardinality is one and the tile is populated the tile should not be populated again.
                            if str(target_tile.nodegroup_id) in single_cardinality_nodegroups and 'TileiD' not in row:
                                target_tile_cardinality = '1'
                            else:
                                target_tile_cardinality = 'n'

                            if str(target_tile.nodegroup_id) not in populated_nodegroups[resourceinstanceid]:
                                target_tile.nodegroup_id = str(target_tile.nodegroup_id)
                                # Check if we are populating a parent tile by inspecting the target_tile.data array.
                                if target_tile.data != {}:
                                    # Iterate through the target_tile nodes and begin populating by iterating througth source_data array.
                                    # The idea is to populate as much of the target_tile as possible, before moving on to the next target_tile.
                                    for target_key in target_tile.data.keys():
                                        for source_tile in source_data:
                                            for source_key in source_tile.keys():
                                                # Check for source and target key match.
                                                if source_key == target_key:
                                                    if target_tile.data[source_key] == None:
                                                        # If match populate target_tile node with transformed value.
                                                        value = transform_value(node_datatypes[source_key], source_tile[source_key], row_number, source_key)
                                                        target_tile.data[source_key] = value['value']
                                                        # target_tile.request = value['request']
                                                        # Delete key from source_tile so we do not populate another tile based on the same data.
                                                        del source_tile[source_key]
                                    # Cleanup source_data array to remove source_tiles that are now '{}' from the code above.
                                    source_data[:] = [item for item in source_data if item != {}]

                                # Check if we are populating a child tile(s) by inspecting the target_tiles.tiles array.
                                elif target_tile.tiles != None:
                                    populated_child_tiles = []
                                    populated_child_nodegroups = []
                                    for childtile in target_tile.tiles:
                                        if str(childtile.nodegroup_id) in single_cardinality_nodegroups:
                                            child_tile_cardinality = '1'
                                        else:
                                            child_tile_cardinality = 'n'

                                        def populate_child_tiles(source_data):
                                            prototype_tile_copy = cPickle.loads(cPickle.dumps(childtile, -1))
                                            tileid = row['TileID'] if 'TileID' in row else uuid.uuid4()
                                            prototype_tile_copy.tileid = tileid
                                            prototype_tile_copy.parenttile = target_tile
                                            parenttileid = row['ParentTileID'] if 'ParentTileID' in row and row['ParentTileID'] is not None else None
                                            if parenttileid is not None:
                                                prototype_tile_copy.parenttile.tileid = parenttileid
                                            prototype_tile_copy.resourceinstance_id = resourceinstanceid
                                            if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                prototype_tile_copy.nodegroup_id = str(prototype_tile_copy.nodegroup_id)
                                                for target_key in prototype_tile_copy.data.keys():
                                                    for source_column in source_data:
                                                        for source_key in source_column.keys():
                                                            if source_key == target_key:
                                                                if prototype_tile_copy.data[source_key] == None:
                                                                    value = transform_value(node_datatypes[source_key], source_column[source_key], row_number, source_key)
                                                                    prototype_tile_copy.data[source_key] = value['value']
                                                                    # print prototype_tile_copy.data[source_key]
                                                                    # print '&'*80
                                                                    # target_tile.request = value['request']
                                                                    del source_column[source_key]
                                                                else:
                                                                    populate_child_tiles(source_data)


                                            if prototype_tile_copy.data != {}:
                                                if len([item for item in prototype_tile_copy.data.values() if item != None]) > 0:
                                                    if str(prototype_tile_copy.nodegroup_id) not in populated_child_nodegroups:
                                                        populated_child_tiles.append(prototype_tile_copy)

                                            if prototype_tile_copy != None:
                                                if child_tile_cardinality == '1' and 'NodeGroupID' not in row:
                                                    populated_child_nodegroups.append(str(prototype_tile_copy.nodegroup_id))

                                            source_data[:] = [item for item in source_data if item != {}]

                                        populate_child_tiles(source_data)

                                    target_tile.tiles = populated_child_tiles

                                if not target_tile.is_blank():
                                    populated_tiles.append(target_tile)

                                if len(source_data)>0:
                                    need_new_tile = True

                                if target_tile_cardinality == '1' and 'NodeGroupID' not in row:
                                    populated_nodegroups[resourceinstanceid].append(str(target_tile.nodegroup_id))

                                if need_new_tile:
                                    new_tile = get_blank_tile(source_data)
                                    if new_tile != None:
                                        populate_tile(source_data, new_tile)

                        # mock_request_object = HttpRequest()

                        if target_tile != None and len(source_data) > 0:
                            populate_tile(source_data, target_tile)
                            # Check that required nodes are populated. If not remove tile from populated_tiles array.
                            check_required_nodes(target_tile, target_tile, required_nodes, all_nodes)

                    previous_row_resourceid = row['ResourceID']
                    legacyid = row['ResourceID']

                # check for missing display value nodes.
                errors = []
                for k,v in missing_display_values.iteritems():
                    if len(v) > 0:
                        errors.append({'type': 'WARNING', 'message': '{0} is null or not mapped on rows {1} and participates in a display value function.'.format(k, ','.join(v))})
                if len(errors) > 0:
                    self.errors += errors

                if 'legacyid' in locals():
                    self.save_resource(populated_tiles, resourceinstanceid, legacyid, resources, target_resource_model, bulk, save_count, row_number)

                if bulk:
                    Resource.bulk_save(resources=resources)

                print _('%s total resource saved' % (save_count + 1))

        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            formatted = traceback.format_exception(exc_type, exc_value, exc_traceback)
            if len(formatted):
                for message in formatted:
                    print message

        finally:
            pass