Exemple #1
0
 def process_single_media_label_field(self):
     """Processes only media field, it does not
        create new media, only reconciles existing already imported
        media
     """
     single_media_field = False
     media_fields = ImportField.objects\
                               .filter(source_id=self.source_id,
                                       field_type='media')
     if len(media_fields) == 1:
         # only for the 1 media field in an import source
         single_media_field = True
         print('yes we have 1 media field')
         field_obj = media_fields[0]
         pc = ProcessCells(self.source_id,
                           self.start_row)
         distinct_records = pc.get_field_records(field_obj.field_num,
                                                 False)
         if distinct_records is not False:
             print('Found Media Records: ' + str(len(distinct_records)))
             for rec_hash, dist_rec in distinct_records.items():
                 # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                 cm = CandidateMedia()
                 cm.mint_new_entity_ok = False  # DO NOT create new entities!
                 cm.project_uuid = self.project_uuid
                 cm.source_id = self.source_id
                 cm.class_uri = field_obj.field_value_cat
                 cm.import_rows = dist_rec['rows']  # list of rows where this record value is found
                 cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                 if cm.uuid is not False:
                     self.reconciled_entities.append({'id': str(cm.uuid),
                                                      'label': cm.label})
     return single_media_field
 def process_persons_batch(self):
     """ processes containment fields for subject
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     if len(self.persons_fields) > 1:
         for field_obj in self.persons_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_obj.field_num,
                                                     False)
             if distinct_records is not False:
                 for rec_hash, dist_rec in distinct_records.items():
                     cp = CandidatePerson()
                     cp.project_uuid = self.project_uuid
                     cp.source_id = self.source_id
                     cp.foaf_type = field_obj.field_value_cat
                     cp.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cp.reconcile_item(dist_rec['imp_cell_obj'])
                     if cp.uuid is not False:
                         if cp.new_entity:
                             self.new_entities.append({'id': cp.uuid,
                                                       'label': cp.label})
                         else:
                             self.reconciled_entities.append({'id': cp.uuid,
                                                              'label': cp.label})
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': bad_id,
                                                              'label': dist_rec['imp_cell_obj'].record})
 def look_up_predicate(self, field_num, row_num):
     """ Looks up the appropriate predicate_uuid based on
         a field_num and a row_num
     """
     predicate = False
     if field_num in self.reconciled_predicates:
         act_field = self.reconciled_predicates[field_num]
         predicate = act_field['predicate']
         if predicate is False:
             if row_num in act_field['rows']:
                 predicate = act_field['rows'][row_num]
             else:
                 # look up the predicate the hard way
                 # we don't have a predicate for this row, so
                 # look it up through reconciliation
                 des_field_obj = act_field['field_obj']
                 pc = ProcessCells(self.source_id,
                                   row_num)
                 distinct_records = pc.get_field_records(field_num,
                                                         [row_num])
                 for row_key, var_dist_rec in distinct_records.items():
                     if len(var_dist_rec['imp_cell_obj'].record) > 0:
                         cdp = CandidateDescriptivePredicate()
                         cdp.label = var_dist_rec['imp_cell_obj'].record
                         cdp.des_import_cell = var_dist_rec['imp_cell_obj']
                         cdp.data_type = des_field_obj.field_data_type
                         cdp.reconcile_predicate_var(des_field_obj)
                         predicate = cdp.predicate
     return predicate
 def reconcile_descriptive_predicates(self, des_by_fields):
     """ reconciles descriptive predicate fields """
     for des_field_obj in des_by_fields:
         field_num = des_field_obj.field_num
         if field_num not in self.reconciled_predicates:
             recon_predicate = {'predicate': False,
                                'field_obj': des_field_obj,
                                'valueof_fields': [],
                                'rows': False}
             if des_field_obj.field_type == 'description':
                 # straight forward. Predicate label from the Import Field label
                 cdp = CandidateDescriptivePredicate()
                 cdp.reconcile_predicate_var(des_field_obj)
                 self.field_valueofs[field_num] = [field_num] # store information about where to get values
                 recon_predicate['predicate'] = cdp.predicate
             elif des_field_obj.field_type == 'variable':
                 # Predicate label in Records of Import cells
                 pc = ProcessCells(self.source_id,
                                   self.start_row)
                 distinct_records = pc.get_field_records(des_field_obj.field_num,
                                                         False)
                 for row_key, dist_rec in distinct_records.items():
                     pred_rows = {}
                     cdp = CandidateDescriptivePredicate()
                     # checks to see if we need to use even a blank label
                     # beccause of dependencies with value-of fields
                     cdp.label = self.make_var_label_evenif_blank(des_field_obj,
                                                                  dist_rec)
                     cdp.des_import_cell = dist_rec['imp_cell_obj']
                     cdp.reconcile_predicate_var(des_field_obj)
                     for imp_cell_row in dist_rec['rows']:
                         pred_rows[imp_cell_row] = cpd.predicate
                     recon_predicate['rows'] = pred_rows
             self.reconciled_predicates[des_field_obj.field_num] = recon_predicate
 def reconcile_types_strings(self):
     """ Reconciles type items by looping through reconciled
         predicate fields. Also reconciles strings
     """
     for field_num, recon_predicate in self.reconciled_predicates.items():
         data_type = recon_predicate['field_obj'].field_data_type
         if data_type == 'id' or data_type == 'xsd:string':
             # we have a field with an id data_type, which becomes a types entity
             if recon_predicate['rows'] is not False:
                 valueof_fields = []
                 valueof_fields_objs = self.get_variable_valueof(recon_predicate['field_obj'])
                 for valueof_field in valueof_fields_objs:
                     if isinstance(valueof_field, ImportField):
                         valueof_fields.append(valueof_field.field_num)
                     elif isinstance(valueof_field, int):
                         valueof_fields.append(valueof_field)
             elif recon_predicate['predicate'] is not False:
                 valueof_fields = [field_num]
             else:
                 valueof_fields = []
             for valueof_field in valueof_fields:
                 pc = ProcessCells(self.source_id,
                                   self.start_row)
                 # print('Check value of field: ' + str(valueof_field))
                 distinct_records = pc.get_field_records(valueof_field,
                                                         False)
                 if distinct_records is not False:
                     for row_key, val_dist_rec in distinct_records.items():
                         if len(val_dist_rec['imp_cell_obj'].record) > 0:
                             # found a non-blank type item
                             cs = CandidateString()
                             cs.source_id = self.source_id
                             cs.project_uuid = self.project_uuid
                             cs.reconcile_string_cell(val_dist_rec['imp_cell_obj'])
                             content_uuid = cs.uuid  # string content uuid
                             if data_type == 'id':
                                 if recon_predicate['rows'] is not False:
                                     # need to create types row by row, because the predicate
                                     # comes from import cell records, not the import field
                                     for row_num in val_dist_rec['rows']:
                                         predicate = self.look_up_predicate(field_num,
                                                                            row_num)
                                         if predicate is not False:
                                             ct = CandidateType()
                                             ct.reconcile_type_cell(predicate.uuid,
                                                                    content_uuid,
                                                                    val_dist_rec['imp_cell_obj'],
                                                                    row_num)
                                 elif recon_predicate['predicate'] is not False:
                                     # predicate comes from the import field
                                     # no need to worry about individual rows
                                     predicate = recon_predicate['predicate']
                                     ct = CandidateType()
                                     ct.source_id = self.source_id
                                     ct.project_uuid = self.project_uuid
                                     ct.reconcile_type_cell(predicate.uuid,
                                                            content_uuid,
                                                            val_dist_rec['imp_cell_obj'],
                                                            False)
Exemple #6
0
 def make_type_ld_annotations(self,
                              sub_type_pred_uuid,
                              sub_type_f_num,
                              rel_pred,
                              obj_le_f_num):
     """ Makes linked data annotations
         for a type in an import
     """
     rels = []
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     if len(sub_type_list) > 0:
         distinct_records = {}
         for cell in sub_type_list:
             if cell.rec_hash not in distinct_records:
                 distinct_records[cell.rec_hash] = {}
                 distinct_records[cell.rec_hash]['rows'] = []
                 distinct_records[cell.rec_hash]['imp_cell_obj'] = cell
             distinct_records[cell.rec_hash]['rows'].append(cell.row_num)
         for rec_hash_key, distinct_type in distinct_records.items():
             # iterate through the distinct types and get associated linked data
             type_label = distinct_type['imp_cell_obj'].record
             rows = distinct_type['rows']
             if len(type_label) > 0:
                 # the type isn't blank, so we can use it
                 pc = ProcessCells(self.source_id, 0)
                 ld_entities = pc.get_field_records(obj_le_f_num, rows)
                 for ld_hash_key, distinct_ld in ld_entities.items():
                     obj_uri = distinct_ld['imp_cell_obj'].record
                     if len(obj_uri) > 8:
                         if obj_uri[:7] == 'http://'\
                            or obj_uri[:8] == 'https://':
                             # we have a valid linked data entity
                             #
                             # now get the UUID for the type
                             tm = TypeManagement()
                             tm.project_uuid = self.project_uuid
                             tm.source_id = self.source_id
                             sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid,
                                                                          type_label)
                             rel = {'subject_label': type_label,
                                    'subject': sub_type.uuid,
                                    'object_uri': obj_uri}
                             rels.append(rel)
     if len(rels) > 0:
         for rel in rels:
             new_la = LinkAnnotation()
             new_la.subject = rel['subject']
             new_la.subject_type = 'types'
             new_la.project_uuid = self.project_uuid
             new_la.source_id = self.source_id
             new_la.predicate_uri = rel_pred
             new_la.object_uri = rel['object_uri']
             new_la.creator_uuid = ''
             new_la.save()
             web_le = WebLinkEntity()
             web_le.check_add_link_entity(rel['object_uri'])
Exemple #7
0
 def make_type_ld_annotations(self,
                              sub_type_pred_uuid,
                              sub_type_f_num,
                              rel_pred,
                              obj_le_f_num):
     """ Makes linked data annotations
         for a type in an import
     """
     rels = []
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     if len(sub_type_list) > 0:
         distinct_records = {}
         for cell in sub_type_list:
             if cell.rec_hash not in distinct_records:
                 distinct_records[cell.rec_hash] = {}
                 distinct_records[cell.rec_hash]['rows'] = []
                 distinct_records[cell.rec_hash]['imp_cell_obj'] = cell
             distinct_records[cell.rec_hash]['rows'].append(cell.row_num)
         for rec_hash_key, distinct_type in distinct_records.items():
             # iterate through the distinct types and get associated linked data
             type_label = distinct_type['imp_cell_obj'].record
             rows = distinct_type['rows']
             if len(type_label) > 0:
                 # the type isn't blank, so we can use it
                 pc = ProcessCells(self.source_id, 0)
                 ld_entities = pc.get_field_records(obj_le_f_num, rows)
                 for ld_hash_key, distinct_ld in ld_entities.items():
                     obj_uri = distinct_ld['imp_cell_obj'].record
                     if len(obj_uri) > 8:
                         if obj_uri[:7] == 'http://'\
                            or obj_uri[:8] == 'https://':
                             # we have a valid linked data entity
                             #
                             # now get the UUID for the type
                             tm = TypeManagement()
                             tm.project_uuid = self.project_uuid
                             tm.source_id = self.source_id
                             sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid,
                                                                          type_label)
                             rel = {'subject_label': type_label,
                                    'subject': sub_type.uuid,
                                    'object_uri': obj_uri}
                             rels.append(rel)
     if len(rels) > 0:
         for rel in rels:
             new_la = LinkAnnotation()
             new_la.subject = rel['subject']
             new_la.subject_type = 'types'
             new_la.project_uuid = self.project_uuid
             new_la.source_id = self.source_id
             new_la.predicate_uri = rel_pred
             new_la.object_uri = rel['object_uri']
             new_la.creator_uuid = ''
             new_la.save()
             web_le = WebLinkEntity()
             web_le.check_add_link_entity(rel['object_uri'])
Exemple #8
0
 def process_multiple_media_fields(self):
     """ processes multiple media fields, if they exist """
     self.get_media_fields()
     self.get_metadata_fields()
     if len(self.media_fields) > 0:
         print('yes we have media')
         for field_obj in self.media_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_obj.field_num,
                                                     False)
             if distinct_records is not False:
                 print('Found Media Records: ' + str(len(distinct_records)))
                 for rec_hash, dist_rec in distinct_records.items():
                     # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                     cm = CandidateMedia()
                     cm.project_uuid = self.project_uuid
                     cm.source_id = self.source_id
                     cm.class_uri = field_obj.field_value_cat
                     cm.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cm.metadata_obj = self.metadata_obj
                     cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                     if cm.uuid is not False:
                         if cm.new_entity:
                             self.new_entities.append({'id': str(cm.uuid),
                                                       'label': cm.label})
                         else:
                             self.reconciled_entities.append({'id': str(cm.uuid),
                                                              'label': cm.label})
                         # we have a media item! Now we can add files to it
                         for part_field_obj in field_obj.parts:
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             part_dist_records = pc.get_field_records(part_field_obj.field_num,
                                                                      cm.import_rows)
                             if part_dist_records is not False:
                                 for rec_hash, part_dist_rec in part_dist_records.items():
                                     # distinct records for the media file parts of a media item
                                     cmf = CandidateMediaFile(cm.uuid)
                                     cmf.imp_cell_obj = part_dist_rec['imp_cell_obj']
                                     cmf.project_uuid = self.project_uuid
                                     cmf.source_id = self.source_id
                                     # file type is in the field_value_cat
                                     cmf.file_type = part_field_obj.field_value_cat
                                     file_uri = part_dist_rec['imp_cell_obj'].record
                                     if file_uri[:7] == 'http://' \
                                        or file_uri[:8] == 'https://':
                                         # its a URI part
                                         cmf.reconcile_media_file(file_uri)
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': bad_id,
                                                              'label': dist_rec['imp_cell_obj'].record})
Exemple #9
0
 def process_documents_batch(self):
     """ processes fields for documents
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_documents_fields()
     self.get_metadata_fields()
     if len(self.documents_fields) > 0:
         print('Number of Document Fields: ' + str(len(self.documents_fields)))
         for field_obj in self.documents_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_obj.field_num,
                                                     False)
             if distinct_records is not False:
                 print('Distinct document recs: ' + str(len(distinct_records)))
                 for rec_hash, dist_rec in distinct_records.items():
                     content = None
                     if isinstance(field_obj.doc_text_field_num, int):
                         # we have a related document text content field
                         # get the text for the document in the first row
                         doc_text_rows = ImportCell.objects\
                                                   .filter(source_id=self.source_id,
                                                           field_num=field_obj.doc_text_field_num,
                                                           row_num=dist_rec['rows'][0])[:1]
                         if len(doc_text_rows) > 0:
                             # we found text content associated with this set
                             content = doc_text_rows[0].record
                     cd = CandidateDocument()
                     cd.project_uuid = self.project_uuid
                     cd.source_id = self.source_id
                     cd.label = field_obj.field_value_cat
                     if isinstance(content, str):
                         # we found content to add to the document.
                         cd.content = content
                     cd.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cd.metadata_obj = self.metadata_obj
                     cd.reconcile_item(dist_rec['imp_cell_obj'])
                     if cd.uuid is not False:
                         if cd.new_entity:
                             self.new_entities.append({'id': str(cd.uuid),
                                                       'label': cd.label})
                         else:
                             self.reconciled_entities.append({'id': str(cd.uuid),
                                                              'label': cd.label})
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': str(bad_id),
                                                              'label': dist_rec['imp_cell_obj'].record})
 def get_assertion_object_values(self, field_num, in_rows):
     """ Gets the import_cell_objects for a given field and row constraint """
     object_imp_cell_objs = []
     if field_num in self.field_valueofs:
         valueof_fields = self.field_valueofs[field_num]
         for valueof_field in valueof_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             cells = pc.get_field_row_records(valueof_field,
                                              in_rows)
             for cell in cells:
                 object_imp_cell_objs.append(cell)
     return object_imp_cell_objs
Exemple #11
0
 def process_media_batch(self):
     """ process media items
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_media_fields()
     if len(self.media_fields) > 0:
         for field_obj in self.media_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_obj.field_num,
                                                     False)
             if distinct_records is not False:
                 for rec_hash, dist_rec in distinct_records.items():
                     # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                     cm = CandidateMedia()
                     cm.project_uuid = self.project_uuid
                     cm.source_id = self.source_id
                     cm.class_uri = field_obj.field_value_cat
                     cm.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                     if cm.uuid is not False:
                         if cm.new_entity:
                             self.new_entities.append({'id': str(cm.uuid),
                                                       'label': cm.label})
                         else:
                             self.reconciled_entities.append({'id': str(cm.uuid),
                                                              'label': cm.label})
                         # we have a media item! Now we can add files to it
                         for part_field_obj in field_obj.parts:
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             part_dist_records = pc.get_field_records(part_field_obj.field_num,
                                                                      cm.import_rows)
                             if part_dist_records is not False:
                                 for rec_hash, part_dist_rec in part_dist_records.items():
                                     # distinct records for the media file parts of a media item
                                     cmf = CandidateMediaFile(cm.uuid)
                                     cmf.project_uuid = self.project_uuid
                                     cmf.source_id = self.source_id
                                     # file type is in the field_value_cat
                                     cmf.file_type = part_field_obj.field_value_cat
                                     file_uri = part_dist_rec['imp_cell_obj'].record
                                     cmf.reconcile_media_file(file_uri)
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': bad_id,
                                                              'label': dist_rec['imp_cell_obj'].record})
Exemple #12
0
 def process_persons_batch(self):
     """ processes containment fields for subject
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_persons_fields()
     if len(self.persons_fields) > 0:
         print('Number of Person Fields: ' + str(len(self.persons_fields)))
         for field_obj in self.persons_fields:
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(
                 field_obj.field_num, False)
             if distinct_records is not False:
                 print('Distinct person recs: ' +
                       str(len(distinct_records)))
                 for rec_hash, dist_rec in distinct_records.items():
                     cp = CandidatePerson()
                     cp.project_uuid = self.project_uuid
                     cp.source_id = self.source_id
                     cp.foaf_type = field_obj.field_value_cat
                     cp.import_rows = dist_rec[
                         'rows']  # list of rows where this record value is found
                     cp.reconcile_item(dist_rec['imp_cell_obj'])
                     if cp.uuid is not False:
                         if cp.new_entity:
                             self.new_entities.append({
                                 'id': str(cp.uuid),
                                 'label': cp.label
                             })
                         else:
                             self.reconciled_entities.append({
                                 'id':
                                 str(cp.uuid),
                                 'label':
                                 cp.label
                             })
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(
                             dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({
                             'id':
                             str(bad_id),
                             'label':
                             dist_rec['imp_cell_obj'].record
                         })
 def process_non_contain_subjects(self):
     """ processes subject entitites that are not in
         containment relations.
         This only allows reconciliation based
         on subject labels, it does not allow
         creation of new subjects.
         Subjects can only be created if they are
         defined in a spatial hierarchy
     """
     if len(self.non_contain_subjects) > 0:
         print('Non-contain process')
         for field_num in self.non_contain_subjects:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_num,
                                                     False)
             if distinct_records is not False:
                 field_obj = self.subjects_fields[field_num]
                 for rec_hash, dist_rec in distinct_records.items():
                     cs = CandidateSubject()
                     cs.project_uuid = self.project_uuid
                     cs.source_id = self.source_id
                     cs.obs_node = 'obs-' + str(field_obj.obs_num)
                     cs.obs_num = field_obj.obs_num
                     cs.parent_context = False
                     cs.parent_uuid = False
                     cs.label_prefix = field_obj.value_prefix
                     cs.allow_new = False  # do not allow new, not in a hierarchy
                     cs.class_uri = field_obj.field_value_cat
                     cs.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cs.reconcile_item(dist_rec['imp_cell_obj'])
                     if cs.uuid is not False:
                         self.process_geospace_item(field_num,
                                                    cs.import_rows,
                                                    cs.uuid)
                         self.process_geojson_item(field_num,
                                                   cs.import_rows,
                                                   cs.uuid)
                         self.process_date_item(field_num,
                                                cs.import_rows,
                                                cs.uuid)
                         self.reconciled_entities.append({'id': cs.uuid,
                                                          'label': cs.label})
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': bad_id,
                                                              'label': dist_rec['imp_cell_obj'].record})
 def make_var_label_evenif_blank(self, des_field_obj, dist_rec):
     """ Checks to see if a descriptive field of type "variable"
        needs to be created even in cases of Import Cell records,
        that are used for labeling predicate-variables are blank.
        We need a "blank" predicate-variable when 
     """
     label = dist_rec['imp_cell_obj'].record
     if len(label) < 1:
         valueof_fields = self.get_variable_valueof(des_field_obj)
         for valueof_field in valueof_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(valueof_field,
                                                     dist_rec['rows'])
             for row_key, val_dist_rec in distinct_records.items():
                 if len(val_dist_rec['imp_cell_obj'].record) > 0:
                     label = CandidateDescriptivePredicate.DEFAULT_BLANK
                     label += '[Field: ' + str(des_field_obj.field_num) + ']'
                     break
             if len(label) > 0:
                 break
     return label
 def get_assertion_object_values(self, field_num, in_rows):
     """ Gets the import_cell_objects for a given field and row constraint """
     object_imp_cell_objs = []
     if field_num not in self.field_valueofs:
         # for some reason we don't have the value of fields yet
         self.get_field_valueofs(field_num)
     if field_num in self.field_valueofs:
         valueof_fields = self.field_valueofs[field_num]
         for valueof_field in valueof_fields:
             if isinstance(valueof_field, ImportField):
                 # it is not an integer, but an ImportField object
                 valueof_field = valueof_field.field_num
             print('Value of field: ' + str(valueof_field))
             pc = ProcessCells(self.source_id,
                               self.start_row)
             cells = pc.get_field_row_records(valueof_field,
                                              in_rows)
             for cell in cells:
                 object_imp_cell_objs.append(cell)
     else:
         print('cannot find field_valueofs for ' + str(field_num))
         pass
     return object_imp_cell_objs
Exemple #16
0
 def process_single_media_label_field(self):
     """Processes only media field, it does not
        create new media, only reconciles existing already imported
        media
     """
     single_media_field = False
     media_fields = ImportField.objects\
                               .filter(source_id=self.source_id,
                                       field_type='media')
     if len(media_fields) == 1:
         # only for the 1 media field in an import source
         single_media_field = True
         print('yes we have 1 media field')
         field_obj = media_fields[0]
         # make the metadata fields for this one media field
         media_field_nums = [field_obj.field_num]
         self.get_metadata_fields(media_field_nums)
         pc = ProcessCells(self.source_id,
                           self.start_row)
         distinct_records = pc.get_field_records(field_obj.field_num,
                                                 False)
         if distinct_records is not False:
             print('Found Media Records: ' + str(len(distinct_records)))
             for rec_hash, dist_rec in distinct_records.items():
                 # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                 cm = CandidateMedia()
                 cm.mint_new_entity_ok = False  # DO NOT create new entities!
                 cm.project_uuid = self.project_uuid
                 cm.source_id = self.source_id
                 cm.class_uri = field_obj.field_value_cat
                 cm.import_rows = dist_rec['rows']  # list of rows where this record value is found
                 cm.metadata_obj = self.metadata_obj
                 cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                 if cm.uuid is not False:
                     self.reconciled_entities.append({'id': str(cm.uuid),
                                                      'label': cm.label})
     return single_media_field
Exemple #17
0
 def process_multiple_media_fields(self):
     """ processes multiple media fields, if they exist """
     self.get_media_fields()
     self.get_metadata_fields()
     if len(self.media_fields) > 0:
         print('yes we have media')
         for field_obj in self.media_fields:
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(field_obj.field_num,
                                                     False)
             if distinct_records is not False:
                 print('Found Media Records: ' + str(len(distinct_records)))
                 for rec_hash, dist_rec in distinct_records.items():
                     # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                     cm = CandidateMedia()
                     cm.project_uuid = self.project_uuid
                     cm.source_id = self.source_id
                     cm.class_uri = field_obj.field_value_cat
                     cm.import_rows = dist_rec['rows']  # list of rows where this record value is found
                     cm.metadata_obj = self.metadata_obj
                     cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                     if cm.uuid is not False:
                         if cm.new_entity:
                             self.new_entities.append({'id': str(cm.uuid),
                                                       'label': cm.label})
                         else:
                             self.reconciled_entities.append({'id': str(cm.uuid),
                                                              'label': cm.label})
                         # we have a media item! Now we can add files to it
                         for part_field_obj in field_obj.parts:
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             part_dist_records = pc.get_field_records(part_field_obj.field_num,
                                                                      cm.import_rows)
                             if part_dist_records is not False:
                                 for rec_hash, part_dist_rec in part_dist_records.items():
                                     # distinct records for the media file parts of a media item
                                     cmf = CandidateMediaFile(cm.uuid)
                                     cmf.imp_cell_obj = part_dist_rec['imp_cell_obj']
                                     cmf.project_uuid = self.project_uuid
                                     cmf.source_id = self.source_id
                                     # file type is in the field_value_cat
                                     cmf.file_type = part_field_obj.field_value_cat
                                     file_uri = part_dist_rec['imp_cell_obj'].record
                                     if file_uri[:7] == 'http://' \
                                        or file_uri[:8] == 'https://':
                                         # its a URI part
                                         cmf.reconcile_media_file(file_uri)
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({'id': bad_id,
                                                              'label': dist_rec['imp_cell_obj'].record})
    def process_field_hierarchy(self,
                                field_num,
                                parent_uuid=False,
                                parent_context='',
                                in_rows=False):
        """ processes subject entitites from a given field. takes arguments
            about:
            1. field_num (the field to find candidate subject entities)
            2. parent_uuid (the uuid for the parent / containing subject entity)
            3. parent_context (the context path of the parent entitiy)
            4. in_rows (a list of row numbers to search within. this insures
               that entities are reconciled within contexts so that a
               Bone 1 in a Locus 1 is noted as different from a Bone 1 in
               Locus 2)

            Note: this function is recursive and calls itself if the
            the field_num has child fields.
        """
        pc = ProcessCells(self.source_id,
                          self.start_row)
        distinct_records = pc.get_field_records(field_num,
                                                in_rows)
        if distinct_records is not False:
            field_obj = self.subjects_fields[field_num]
            if field_num == self.root_subject_field and parent_uuid is False:
                if field_num in self.field_parent_entities:
                    if self.field_parent_entities[field_num] is not False:
                        parent_uuid = self.field_parent_entities[field_num].uuid
                        parent_context = self.field_parent_entities[field_num].context
            for rec_hash, dist_rec in distinct_records.items():
                cs = CandidateSubject()
                cs.project_uuid = self.project_uuid
                cs.source_id = self.source_id
                cs.obs_node = 'obs-' + str(field_obj.obs_num)
                cs.obs_num = field_obj.obs_num
                cs.parent_context = parent_context
                cs.parent_uuid = parent_uuid
                cs.label_prefix = field_obj.value_prefix
                cs.allow_new = True  # allow new because it is a hierarchic field
                cs.class_uri = field_obj.field_value_cat
                cs.import_rows = dist_rec['rows']  # list of rows where this record value is found
                cs.reconcile_item(dist_rec['imp_cell_obj'])
                # show_item = str(unidecode(dist_rec['imp_cell_obj'].record))
                # print('Reconciled item: ' + show_item)
                # print('--- Has uuid: ' + str(cs.uuid))
                if cs.uuid is not False:
                    if cs.is_new:
                        self.new_entities.append({'id': str(cs.uuid),
                                                  'label': cs.context})
                    else:
                        self.reconciled_entities.append({'id': str(cs.uuid),
                                                         'label': cs.context})
                    if field_num in self.contain_ordered_subjects:
                        if self.contain_ordered_subjects[field_num] is not False:
                            # subject entity successfully reconciled or created
                            # now process next level down in hierarchy, if it exists
                            for child_field in self.contain_ordered_subjects[field_num]:
                                self.process_field_hierarchy(child_field,
                                                             cs.uuid,
                                                             cs.context,
                                                             dist_rec['rows'])
                else:
                    bad_id = str(dist_rec['imp_cell_obj'].field_num) + '-' + str(dist_rec['imp_cell_obj'].row_num)
                    self.not_reconciled_entities.append({'id': str(bad_id),
                                                         'label': dist_rec['imp_cell_obj'].record})
Exemple #19
0
 def process_media_batch(self):
     """ process media items
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_media_fields()
     if len(self.media_fields) > 0:
         for field_obj in self.media_fields:
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(
                 field_obj.field_num, False)
             if distinct_records is not False:
                 for rec_hash, dist_rec in distinct_records.items():
                     # print('Checking on: ' + dist_rec['imp_cell_obj'].record)
                     cm = CandidateMedia()
                     cm.project_uuid = self.project_uuid
                     cm.source_id = self.source_id
                     cm.class_uri = field_obj.field_value_cat
                     cm.import_rows = dist_rec[
                         'rows']  # list of rows where this record value is found
                     cm.reconcile_manifest_item(dist_rec['imp_cell_obj'])
                     if cm.uuid is not False:
                         if cm.new_entity:
                             self.new_entities.append({
                                 'id': str(cm.uuid),
                                 'label': cm.label
                             })
                         else:
                             self.reconciled_entities.append({
                                 'id':
                                 str(cm.uuid),
                                 'label':
                                 cm.label
                             })
                         # we have a media item! Now we can add files to it
                         for part_field_obj in field_obj.parts:
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             part_dist_records = pc.get_field_records(
                                 part_field_obj.field_num, cm.import_rows)
                             if part_dist_records is not False:
                                 for rec_hash, part_dist_rec in part_dist_records.items(
                                 ):
                                     # distinct records for the media file parts of a media item
                                     cmf = CandidateMediaFile(cm.uuid)
                                     cmf.project_uuid = self.project_uuid
                                     cmf.source_id = self.source_id
                                     # file type is in the field_value_cat
                                     cmf.file_type = part_field_obj.field_value_cat
                                     file_uri = part_dist_rec[
                                         'imp_cell_obj'].record
                                     cmf.reconcile_media_file(file_uri)
                     else:
                         bad_id = str(dist_rec['imp_cell_obj'].field_num)
                         bad_id += '-' + str(
                             dist_rec['imp_cell_obj'].row_num)
                         self.not_reconciled_entities.append({
                             'id':
                             bad_id,
                             'label':
                             dist_rec['imp_cell_obj'].record
                         })
 def get_description_examples(self):
     """ Gets example entities described by other fields
     """
     example_entities = []
     self.get_description_annotations()
     if self.des_rels is not False:
         for subj_field_num, ent_obj in self.des_rels.items():
             # get some example records 
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = ent_obj['field'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(row_key)
                         entity['descriptions'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         for des_field_obj in ent_obj['des_by_fields']:
                             des_item = LastUpdatedOrderedDict()
                             des_item['predicate'] = LastUpdatedOrderedDict()
                             # values are in a list, in case there are more than 1 (variable-value)
                             des_item['objects'] = []
                             des_item['predicate']['type'] = des_field_obj.field_type
                             if des_field_obj.field_type == 'description':
                                 # set the predicate for this description
                                 des_item['predicate']['label'] = des_field_obj.label
                                 des_item['predicate']['id'] = des_field_obj.field_num
                                 # now get a value for this description from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 val_recs = pc.get_field_records(des_field_obj.field_num,
                                                                 example_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 val_rec = pg.get_first_distinct_record(val_recs)
                                 if val_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_val['record'] = val_rec['imp_cell_obj'].record
                                     object_val['id'] = val_rec['rows'][0]
                                     des_item['objects'].append(object_val)
                             elif des_field_obj.field_type == 'variable':
                                 # need to get the predicate from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 var_recs = pc.get_field_records(des_field_obj.field_num,
                                                                 example_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 val_rec = pg.get_first_distinct_record(val_recs)
                                 if var_rec is not False:
                                     des_item['predicate']['label'] = var_rec['imp_cell_obj'].record
                                     pid = str(des_field_obj.field_num) + '-' + str(var_rec['rows'][0])
                                     des_item['predicate']['id'] = pid
                                     # now need to get fields that have object values for the predicate
                                     valueof_fields = self.get_variable_valueof(des_field_obj)
                                     for val_field_obj in valueof_fields:
                                         pc = ProcessCells(self.source_id,
                                                           self.start_row)
                                         val_recs = pc.get_field_records(val_field_obj.field_num,
                                                                         example_rows)
                                         pg = ProcessGeneral(self.source_id)
                                         val_rec = pg.get_first_distinct_record(val_recs)
                                         if val_rec is not False:
                                             object_val = LastUpdatedOrderedDict()
                                             object_val['record'] = val_rec['imp_cell_obj'].record
                                             oid = str(val_field_obj.field_num) + '-' + str(val_rec['rows'][0])
                                             object_val['id'] = oid
                                             des_item['objects'].append(object_val)
                             entity['descriptions'].append(des_item)
                         example_entities.append(entity)
     return example_entities
Exemple #21
0
 def get_link_examples(self):
     """ Gets example entities with linking relations
     """
     example_entities = []
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records 
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = rels['sub_field_obj'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(row_key)
                         entity['links'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         in_rows = [dist_rec['rows'][0]]
                         for pred_obj in rels['pred_objs']:
                             act_preds = []
                             if pred_obj['predicate_uuid'] is not False:
                                 pred_item = LastUpdatedOrderedDict()
                                 pred_item['id'] = pred_obj['predicate_uuid']
                                 ent = Entity()
                                 found = ent.dereference(pred_obj['predicate_uuid'])
                                 if found:
                                     pred_item['label'] = ent.label
                                 else:
                                     pred_item['label'] = '[Missing predicate!]'
                                 act_preds.append(pred_item)
                             elif pred_obj['pred_field_obj'] is not False:
                                 # linking predicate is in a field
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 predicate_records= pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                                         in_rows)
                                 for pred_row_key, pred_rec in predicate_records.items():
                                     pred_item = LastUpdatedOrderedDict()
                                     pred_item['id'] = str(pred_obj['pred_field_obj'].field_num)
                                     pred_item['id'] += '-' + str(pred_rec['rows'][0])
                                     pred_item['label'] = pred_rec['imp_cell_obj'].record
                                     if len(pred_item['label']) < 1:
                                         pred_item['label'] = '[BLANK]'
                                     if len(act_precs) < self.example_size:
                                         act_preds.append(pred_item)
                             for pred_item in act_preds:
                                 link_item = LastUpdatedOrderedDict()
                                 link_item['predicate'] = pred_item
                                 # values are in a list, to keep consistent with descriptions
                                 link_item['object'] = False
                                 obj_field_obj = pred_obj['obj_field_obj']
                                 # now get a value for the object from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 obj_recs = pc.get_field_records(obj_field_obj.field_num,
                                                                 in_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 obj_rec = pg.get_first_distinct_record(obj_recs)
                                 if obj_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_label = obj_field_obj.value_prefix
                                     if len(obj_rec['imp_cell_obj'].record) > 1:
                                         object_label += obj_rec['imp_cell_obj'].record
                                     else:
                                         object_label += '[BLANK]'
                                     object_val['label'] = object_label
                                     object_val['id'] = str(obj_rec['imp_cell_obj'].field_num)
                                     object_val['id'] += '-' + str(obj_rec['rows'][0])
                                     link_item['object'] = object_val
                                     if len(entity['links']) < self.example_size:
                                         entity['links'].append(link_item)
                         example_entities.append(entity)
     return example_entities
Exemple #22
0
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id,
                               self.start_row)
             # now get distinct records as determined by having the same assigned
             # uuid
             distinct_records = pc.get_field_records_by_fl_uuid(subj_field_num,
                                                                False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     if subject_ok is False:
                         in_rows = [-1]
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             # limit to the 'in rows' for the current item
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj['pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             predicate_records= pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                                     in_rows)
                             for pred_row_key, pred_rec in predicate_records.items():
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items():
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records_by_fl_uuid(obj_field_obj.field_num,
                                                                        act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             if obj_recs is not False:
                                 for hash_key, obj_rec in obj_recs.items():
                                     object_uuid = obj_rec['imp_cell_obj'].fl_uuid
                                     object_type = obj_field_obj.field_type
                                     object_ok = obj_rec['imp_cell_obj'].cell_ok
                                     object_record = obj_rec['imp_cell_obj'].record
                                     if len(object_record) < 1:
                                         # blank record, don't make a link
                                         object_ok = False
                                     if object_uuid is False or\
                                         len(object_uuid) < 1:
                                          object_ok = False
                                     if object_uuid == 'False':
                                          object_ok = False
                                     if object_ok and subject_ok:
                                         message = 'Attempt link: ' + subject_record + ' ('+ subject_uuid + ') -> '
                                         message += predicate_uuid + ' -> ' + object_record + ' ('+ object_uuid + ')'
                                         message += 'in rows: ' + str(act_in_rows)
                                         # print(message)
                                         cla = CandidateLinkAssertion()
                                         cla.project_uuid = self.project_uuid
                                         cla.source_id = self.source_id
                                         cla.subject_uuid = subject_uuid
                                         cla.subject_type = subject_type
                                         cla.obs_node = obs_node
                                         cla.obs_num = obs_num
                                         cla.sort = sort
                                         cla.predicate_uuid = predicate_uuid
                                         cla.object_uuid = object_uuid
                                         cla.object_type = object_type
                                         if (subject_ok and object_ok) and predicate_uuid is not False:
                                             # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                             cla.create_link()
                                             if cla.is_valid:
                                                 self.count_new_assertions += 1
                                                 print('Link Count OK: ' + str(self.count_new_assertions))
 def process_description_batch(self):
     """ processes fields describing a subject (subjects, media, documents, persons, projects)
         entity field.
         if start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_description_annotations()
     if self.des_rels is not False:
         for subj_field_num, ent_obj in self.des_rels.items():
             # loop through the fields that describe the subj_field_num
             self.reconcile_descriptive_predicates(ent_obj['des_by_fields'])
         # --------
         # reconciles types and strings by looping through reconciled predicate fields
         self.reconcile_types_strings()
         # --------
         for subj_field_num, ent_obj in self.des_rels.items():
             subj_field_type = ent_obj['field'].field_type
             # get records for the subject of the description
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(subj_field_num,
                                                                False)
             if distinct_records is not False:
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 # print(str(distinct_records))
                 for row_key, dist_rec in distinct_records.items():
                     if dist_rec['imp_cell_obj'].cell_ok:
                         subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                         # the subject record is OK to use for creating
                         # description records
                         for des_field_obj in ent_obj['des_by_fields']:
                             des_field_num = des_field_obj.field_num
                             if des_field_obj.obs_num < 1:
                                 obs_num = 1
                             else:
                                 obs_num = des_field_obj.obs_num
                             obs_node = '#obs-' + str(obs_num)
                             # get the 'value-of' import cell objects for the current
                             # 'descriptive' or 'variable' field_num
                             # 'variable' field_nums may make multiple 'value-of' import_cell_objs
                             object_imp_cell_objs = self.get_assertion_object_values(des_field_num,
                                                                                     dist_rec['rows'])
                             for imp_cell_obj in object_imp_cell_objs:
                                 row_num = imp_cell_obj.row_num
                                 predicate = self.look_up_predicate(des_field_num,
                                                                    row_num)
                                 if predicate is not False:
                                     cd = CandidateDescription()
                                     cd.source_id = self.source_id
                                     cd.project_uuid = self.project_uuid
                                     cd.subject_uuid = subject_uuid
                                     cd.subject_type = subj_field_type
                                     cd.obs_num = obs_num
                                     cd.obs_node = obs_node
                                     cd.sort = des_field_num
                                     cd.predicate_uuid = str(predicate.uuid)
                                     cd.data_type = predicate.data_type
                                     cd.record = str(imp_cell_obj.record)
                                     cd.fl_uuid = imp_cell_obj.fl_uuid
                                     cd.l_uuid = imp_cell_obj.l_uuid
                                     cd.create_description()
                                     if cd.is_valid:
                                         self.count_new_assertions += 1
 def process_complex_batch(self):
     """ processes fields for documents
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_complex_description_fields()
     label_str_uuids = {}
     if len(self.complex_des_fields) > 0:
         print('Number of Complex Description Fields: ' + str(len(self.complex_des_fields)))
         cp_id_number = 0
         for cp_field in self.complex_des_fields:
             cp_id_number += 1
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(cp_field.describes_field.field_num,
                                                                False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if cp_field.obs_num < 1:
                         obs_num = 1
                     else:
                         obs_num = cp_field.obs_num
                     obs_node = '#obs-' + str(obs_num)
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = cp_field.describes_field.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     print('Look for complex description labels in rows: ' + str(in_rows))
                     if subject_ok is not False:
                         # OK! we have the subjects of complex descriptions
                         # with uuids, so now we can make an fl_uuid for each
                         # of the complex description fields.
                         complex_uuid = subject_uuid + self.FRAG_ID_PREFIX + str(cp_id_number)
                         complex_recs = ImportCell.objects\
                                                  .filter(source_id=self.source_id,
                                                          field_num=cp_field.field_num,
                                                          row_num__in=in_rows)\
                                                  .exclude(record='')
                         if len(complex_recs) > 0:
                             # we have records in the complex description field that are not blank
                             # and are associated with the subject of the complex description.
                             # so now, let's record this association.
                             save_ok = False
                             new_ass = Assertion()
                             new_ass.uuid = subject_uuid
                             new_ass.subject_type = subject_type
                             new_ass.project_uuid = self.project_uuid
                             new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                             new_ass.obs_node = obs_node
                             new_ass.obs_num = obs_num
                             new_ass.sort = 100 + cp_id_number
                             new_ass.visibility = 1
                             new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES
                             new_ass.object_type = 'complex-description'
                             new_ass.object_uuid = complex_uuid
                             new_ass.save()
                             try:
                                 print('Saved complex-description: ' + complex_uuid)
                                 new_ass.save()
                                 save_ok = True
                             except:
                                 save_ok = False
                             if save_ok:
                                 self.count_new_assertions += 1
                             # now look through the complex description records and make labels
                             for comp_rec in complex_recs:
                                 # first save the fl_uuid for the complex description
                                 comp_rec.fl_uuid = complex_uuid
                                 comp_rec.save()
                                 if isinstance(cp_field.value_prefix, str):
                                     cp_label = cp_field.value_prefix + comp_rec.record
                                 else:
                                     cp_label = comp_rec.record
                                 if cp_label not in label_str_uuids:
                                     # make a uuid for the record value
                                     # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                     sm = StringManagement()
                                     sm.project_uuid = self.project_uuid
                                     sm.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                     oc_string = sm.get_make_string(cp_label)
                                     content_uuid = oc_string.uuid
                                     label_str_uuids[cp_label] = content_uuid
                                 content_uuid = label_str_uuids[cp_label]
                                 save_ok = False
                                 new_ass = Assertion()
                                 new_ass.uuid = complex_uuid
                                 new_ass.subject_type = 'complex-description'
                                 new_ass.project_uuid = self.project_uuid
                                 # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                 new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                 new_ass.obs_node = '#obs-' + str(self.obs_num_complex_description_assertions)
                                 new_ass.obs_num = self.obs_num_complex_description_assertions
                                 new_ass.sort = 1
                                 new_ass.visibility = 1
                                 new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES_LABEL
                                 new_ass.object_type = 'xsd:string'
                                 new_ass.object_uuid = content_uuid
                                 try:
                                     new_ass.save()
                                     save_ok = True
                                 except:
                                     save_ok = False
                                 if save_ok:
                                     self.count_new_assertions += 1
 def get_contained_field_exp(self,
                             field_num,
                             in_rows=False,
                             check_parent_entity=False):
     """ get examples of entities in containment fields, does recursive lookups
         to get a whole tree, limited to a maximum of a few examples
     """
     contain_nodes = False
     add_field_examples = True
     if field_num == self.root_subject_field and check_parent_entity:
         # Check to see if the root is contained in a named entity
         if self.field_parent_entities[field_num] is not False:
             # Root is in a named entity, so add it.
             contain_nodes = []
             add_field_examples = False
             parent_uuid = self.field_parent_entities[field_num].uuid
             parent_context = self.field_parent_entities[field_num].context
             contain_node = LastUpdatedOrderedDict()
             contain_node['label'] = parent_context
             contain_node['type'] = 'subjects'
             contain_node['field_label'] = 'Parent of field: ' + self.subjects_fields[field_num].label
             contain_node['field_num'] = 0
             contain_node['id'] = parent_uuid
             # now look for children of the root entity.
             contain_node['children'] = self.get_contained_field_exp(field_num)
             contain_nodes.append(contain_node)
     if add_field_examples:
         pc = ProcessCells(self.source_id,
                           self.start_row)
         distinct_records = pc.get_field_records(field_num,
                                                 in_rows)
         if distinct_records is not False:
             contain_nodes = []
             unique_labels = []
             field_obj = self.subjects_fields[field_num]
             for rec_hash, dist_rec in distinct_records.items():
                 if len(contain_nodes) <= self.example_size:
                     # only add examples if we're less or equal to the the total example size
                     contain_node = LastUpdatedOrderedDict()
                     entity_label = dist_rec['imp_cell_obj'].record
                     if len(entity_label) < 1:
                         entity_label = '[BLANK]'
                     entity_label = field_obj.value_prefix + entity_label
                     contain_node['label'] = entity_label
                     contain_node['type'] = 'import-record'
                     contain_node['field_label'] = field_obj.label
                     contain_node['field_num'] = field_num
                     contain_node['id'] = dist_rec['rows'][0]
                     contain_node['children'] = False
                     if field_num in self.contain_ordered_subjects:
                         if self.contain_ordered_subjects[field_num] is not False:
                             unique_child_labels = []
                             for child_field in self.contain_ordered_subjects[field_num]:
                                 act_children = self.get_contained_field_exp(child_field,
                                                                             dist_rec['rows'])
                                 if act_children is not False:
                                     if contain_node['children'] is False:
                                         contain_node['children'] = []
                                     for act_child in act_children:
                                         if act_child['label'] not in unique_child_labels:
                                             # so we only list the same entity once
                                             contain_node['children'].append(act_child)
                                             unique_child_labels.append(act_child['label'])
                     if entity_label not in unique_labels:
                         # so we only list the same entity once
                         contain_nodes.append(contain_node)
                         unique_labels.append(entity_label)
     return contain_nodes
Exemple #26
0
 def get_link_examples(self):
     """ Gets example entities with linking relations
     """
     example_entities = []
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(subj_field_num, False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = rels[
                             'sub_field_obj'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(
                             row_key)
                         entity['links'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         in_rows = [dist_rec['rows'][0]]
                         for pred_obj in rels['pred_objs']:
                             act_preds = []
                             if pred_obj['predicate_uuid'] is not False:
                                 pred_item = LastUpdatedOrderedDict()
                                 pred_item['id'] = pred_obj[
                                     'predicate_uuid']
                                 ent = Entity()
                                 found = ent.dereference(
                                     pred_obj['predicate_uuid'])
                                 if found:
                                     pred_item['label'] = ent.label
                                 else:
                                     pred_item[
                                         'label'] = '[Missing predicate!]'
                                 act_preds.append(pred_item)
                             elif pred_obj['pred_field_obj'] is not False:
                                 # linking predicate is in a field
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 pred_recs = pc.get_field_records(
                                     pred_obj['pred_field_obj'].field_num,
                                     in_rows)
                                 for pred_rec in pred_recs:
                                     pred_item = LastUpdatedOrderedDict()
                                     pred_item['id'] = str(
                                         pred_obj['pred_field_obj'].
                                         field_num)
                                     pred_item['id'] += '-' + str(
                                         pred_rec['rows'][0])
                                     pred_item['label'] = pred_rec[
                                         'imp_cell_obj'].record
                                     if len(pred_item['label']) < 1:
                                         pred_item['label'] = '[BLANK]'
                                     if len(act_precs) < self.example_size:
                                         act_preds.append(pred_item)
                             for pred_item in act_preds:
                                 link_item = LastUpdatedOrderedDict()
                                 link_item['predicate'] = pred_item
                                 # values are in a list, to keep consistent with descriptions
                                 link_item['object'] = False
                                 obj_field_obj = pred_obj['obj_field_obj']
                                 # now get a value for the object from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 obj_recs = pc.get_field_records(
                                     obj_field_obj.field_num, in_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 obj_rec = pg.get_first_distinct_record(
                                     obj_recs)
                                 if obj_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_label = obj_field_obj.value_prefix
                                     if len(obj_rec['imp_cell_obj'].record
                                            ) > 1:
                                         object_label += obj_rec[
                                             'imp_cell_obj'].record
                                     else:
                                         object_label += '[BLANK]'
                                     object_val['label'] = object_label
                                     object_val['id'] = str(
                                         obj_rec['imp_cell_obj'].field_num)
                                     object_val['id'] += '-' + str(
                                         obj_rec['rows'][0])
                                     link_item['object'] = object_val
                                     if len(entity['links']
                                            ) < self.example_size:
                                         entity['links'].append(link_item)
                         example_entities.append(entity)
     return example_entities
 def process_complex_batch(self):
     """ processes fields for documents
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_complex_description_fields()
     label_str_uuids = {}
     if len(self.complex_des_fields) > 0:
         print('Number of Complex Description Fields: ' +
               str(len(self.complex_des_fields)))
         cp_id_number = 0
         for cp_field in self.complex_des_fields:
             cp_id_number += 1
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(
                 cp_field.describes_field.field_num, False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if cp_field.obs_num < 1:
                         obs_num = 1
                     else:
                         obs_num = cp_field.obs_num
                     obs_node = '#obs-' + str(obs_num)
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = cp_field.describes_field.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     print('Look for complex description labels in rows: ' +
                           str(in_rows))
                     if subject_ok is not False:
                         # OK! we have the subjects of complex descriptions
                         # with uuids, so now we can make an fl_uuid for each
                         # of the complex description fields.
                         complex_uuid = subject_uuid + self.FRAG_ID_PREFIX + str(
                             cp_id_number)
                         complex_recs = ImportCell.objects\
                                                  .filter(source_id=self.source_id,
                                                          field_num=cp_field.field_num,
                                                          row_num__in=in_rows)\
                                                  .exclude(record='')
                         if len(complex_recs) > 0:
                             # we have records in the complex description field that are not blank
                             # and are associated with the subject of the complex description.
                             # so now, let's record this association.
                             save_ok = False
                             new_ass = Assertion()
                             new_ass.uuid = subject_uuid
                             new_ass.subject_type = subject_type
                             new_ass.project_uuid = self.project_uuid
                             new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                             new_ass.obs_node = obs_node
                             new_ass.obs_num = obs_num
                             new_ass.sort = 100 + cp_id_number
                             new_ass.visibility = 1
                             new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES
                             new_ass.object_type = 'complex-description'
                             new_ass.object_uuid = complex_uuid
                             new_ass.save()
                             try:
                                 print('Saved complex-description: ' +
                                       complex_uuid)
                                 new_ass.save()
                                 save_ok = True
                             except:
                                 save_ok = False
                             if save_ok:
                                 self.count_new_assertions += 1
                             # now look through the complex description records and make labels
                             for comp_rec in complex_recs:
                                 # first save the fl_uuid for the complex description
                                 comp_rec.fl_uuid = complex_uuid
                                 comp_rec.save()
                                 if isinstance(cp_field.value_prefix, str):
                                     cp_label = cp_field.value_prefix + comp_rec.record
                                 else:
                                     cp_label = comp_rec.record
                                 if cp_label not in label_str_uuids:
                                     # make a uuid for the record value
                                     # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                     sm = StringManagement()
                                     sm.project_uuid = self.project_uuid
                                     sm.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                     oc_string = sm.get_make_string(
                                         cp_label)
                                     content_uuid = oc_string.uuid
                                     label_str_uuids[
                                         cp_label] = content_uuid
                                 content_uuid = label_str_uuids[cp_label]
                                 save_ok = False
                                 new_ass = Assertion()
                                 new_ass.uuid = complex_uuid
                                 new_ass.subject_type = 'complex-description'
                                 new_ass.project_uuid = self.project_uuid
                                 # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                 new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                 new_ass.obs_node = '#obs-' + str(
                                     self.
                                     obs_num_complex_description_assertions)
                                 new_ass.obs_num = self.obs_num_complex_description_assertions
                                 new_ass.sort = 1
                                 new_ass.visibility = 1
                                 new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES_LABEL
                                 new_ass.object_type = 'xsd:string'
                                 new_ass.object_uuid = content_uuid
                                 try:
                                     new_ass.save()
                                     save_ok = True
                                 except:
                                     save_ok = False
                                 if save_ok:
                                     self.count_new_assertions += 1
Exemple #28
0
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     if subject_uuid is False:
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj['pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             pred_recs = pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                              in_rows)
                             for pred_rec in pred_recs:
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items():
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records(obj_field_obj.field_num,
                                                             act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             for hash_key, obj_rec in obj_recs.items():
                                 # print('Worry about: ' + str(obj_rec['imp_cell_obj'].record))
                                 object_uuid = obj_rec['imp_cell_obj'].fl_uuid
                                 object_type = obj_field_obj.field_type
                                 object_ok = obj_rec['imp_cell_obj'].cell_ok
                                 cla = CandidateLinkAssertion()
                                 cla.project_uuid = self.project_uuid
                                 cla.source_id = self.source_id
                                 cla.subject_uuid = subject_uuid
                                 cla.subject_type = subject_type
                                 cla.obs_node = obs_node
                                 cla.obs_num = obs_num
                                 cla.sort = sort
                                 cla.predicate_uuid = predicate_uuid
                                 cla.object_uuid = object_uuid
                                 cla.object_type = object_type
                                 if (subject_ok and object_ok) and predicate_uuid is not False:
                                     # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                     cla.create_link()
                                     if cla.is_valid:
                                         self.count_new_assertions += 1
                                         print('Count OK: ' + str(self.count_new_assertions))
Exemple #29
0
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(subj_field_num, False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     if subject_uuid is False:
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj[
                                     'pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             pred_recs = pc.get_field_records(
                                 pred_obj['pred_field_obj'].field_num,
                                 in_rows)
                             for pred_rec in pred_recs:
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(
                                     pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items(
                         ):
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records(
                                 obj_field_obj.field_num, act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             for hash_key, obj_rec in obj_recs.items():
                                 # print('Worry about: ' + str(obj_rec['imp_cell_obj'].record))
                                 object_uuid = obj_rec[
                                     'imp_cell_obj'].fl_uuid
                                 object_type = obj_field_obj.field_type
                                 object_ok = obj_rec['imp_cell_obj'].cell_ok
                                 cla = CandidateLinkAssertion()
                                 cla.project_uuid = self.project_uuid
                                 cla.source_id = self.source_id
                                 cla.subject_uuid = subject_uuid
                                 cla.subject_type = subject_type
                                 cla.obs_node = obs_node
                                 cla.obs_num = obs_num
                                 cla.sort = sort
                                 cla.predicate_uuid = predicate_uuid
                                 cla.object_uuid = object_uuid
                                 cla.object_type = object_type
                                 if (subject_ok and object_ok
                                     ) and predicate_uuid is not False:
                                     # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                     cla.create_link()
                                     if cla.is_valid:
                                         self.count_new_assertions += 1
                                         print(
                                             'Count OK: ' +
                                             str(self.count_new_assertions))