コード例 #1
0
 def get_description_annotations(self):
     """ Gets descriptive annotations, and a 
         or a list of fields that are not in containment relationships
     """
     self.description_annotations = ImportFieldAnnotation.objects\
                                                         .filter(source_id=self.source_id,
                                                                 predicate=ImportFieldAnnotation.PRED_DESCRIBES)\
                                                         .order_by('field_num')
     if len(self.description_annotations) > 0:
         self.count_active_fields = len(self.description_annotations)
         self.des_rels = LastUpdatedOrderedDict()
         for des_anno in self.description_annotations:
             add_descriptor_field = False
             if des_anno.object_field_num not in self.des_rels:
                 # entities being described are in the field identified by object_field_num
                 pg = ProcessGeneral(self.source_id)
                 field_obj = pg.get_field_obj(des_anno.object_field_num)
                 if field_obj is not False:
                     if field_obj.field_type in ImportProfile.DEFAULT_SUBJECT_TYPE_FIELDS:
                         self.des_rels[des_anno.object_field_num] = LastUpdatedOrderedDict()
                         self.des_rels[des_anno.object_field_num]['field'] = field_obj
                         self.des_rels[des_anno.object_field_num]['des_by_fields'] = []
                         add_descriptor_field = True
             else:
                 add_descriptor_field = True
             if add_descriptor_field:
                 # the descriptive field is identified by the field_num
                 pg = ProcessGeneral(self.source_id)
                 des_field_obj = pg.get_field_obj(des_anno.field_num)
                 if des_field_obj is not False:
                     self.des_rels[des_anno.object_field_num]['des_by_fields'].append(des_field_obj)
コード例 #2
0
ファイル: templating.py プロジェクト: rdhyee/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.fields = []
     self.has_doc_field = False
     self.raw_field_annotations = []
     self.label = False
     self.has_subjects = False
     self.get_examples = False
     self.field_annotations = False
     self.PREDICATE_CONTAINS = Assertion.PREDICATES_CONTAINS
     self.PREDICATE_LINK = Assertion.PREDICATES_LINK
     self.PREDICATE_CONTAINED_IN = ImportFieldAnnotation.PRED_CONTAINED_IN
     self.PRED_DESCRIBES = ImportFieldAnnotation.PRED_DESCRIBES
     self.PRED_VALUE_OF = ImportFieldAnnotation.PRED_VALUE_OF
     self.PRED_MEDIA_PART_OF = ImportFieldAnnotation.PRED_MEDIA_PART_OF
     self.PRED_DOC_Text = ImportFieldAnnotation.PRED_DOC_Text
     self.PRED_GEO_LOCATION = ImportFieldAnnotation.PRED_GEO_LOCATION
     self.PRED_DATE_EVENT = ImportFieldAnnotation.PRED_DATE_EVENT
     self.PRED_OBS_NUM = ImportFieldAnnotation.PRED_OBS_NUM
     self.PRED_METADATA = ImportFieldAnnotation.PRED_METADATA
     self.PRED_DRAFT_CONTAINS = ImportFieldAnnotation.PRED_DRAFT_CONTAINS
     self.PRED_COMPLEX_DES = ImportFieldAnnotation.PRED_COMPLEX_DES
     self.PRED_COMPLEX_LABEL = ImportFieldAnnotation.PRED_COMPLEX_LABEL
     self.nav = False
     self.has_media_field = False
     self.has_doc_field = False
     self.has_complex_des_field = False
コード例 #3
0
 def reconcile_item(self, imp_cell_obj):
     """ Checks to see if the item exists """
     self.imp_cell_obj = imp_cell_obj
     if len(imp_cell_obj.record) > 0:
         self.combined_name = imp_cell_obj.record
         self.label = imp_cell_obj.record
     else:
         pg = ProcessGeneral(self.source_id)
         if self.import_rows is not False:
             check_list = self.import_rows
         else:
             check_list = [imp_cell_obj.row_num]
         self.evenif_blank = pg.check_blank_required(
             imp_cell_obj.field_num, check_list)
         if self.evenif_blank:
             self.combined_name = self.DEFAULT_BLANK
             self.label = self.DEFAULT_BLANK
     if isinstance(self.label, str):
         if len(self.label) > 0:
             match_found = self.match_against_persons(self.combined_name)
             if match_found is False:
                 # create new subject, manifest objects. Need new UUID, since we can't assume
                 # the fl_uuid for the ImportCell reflects unique entities in a field, since
                 # uniqueness depends on context (values in other cells)
                 self.new_entity = True
                 self.uuid = GenUUID.uuid4()
                 self.create_person_item()
     self.update_import_cell_uuid()
コード例 #4
0
ファイル: templating.py プロジェクト: ekansa/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.fields = []
     self.has_doc_field = False
     self.raw_field_annotations = []
     self.label = False
     self.has_subjects = False
     self.get_examples = False
     self.field_annotations = False
     self.PREDICATE_CONTAINS = Assertion.PREDICATES_CONTAINS
     self.PREDICATE_LINK = Assertion.PREDICATES_LINK
     self.PREDICATE_CONTAINED_IN = ImportFieldAnnotation.PRED_CONTAINED_IN
     self.PRED_DESCRIBES = ImportFieldAnnotation.PRED_DESCRIBES
     self.PRED_VALUE_OF = ImportFieldAnnotation.PRED_VALUE_OF
     self.PRED_MEDIA_PART_OF = ImportFieldAnnotation.PRED_MEDIA_PART_OF
     self.PRED_DOC_Text = ImportFieldAnnotation.PRED_DOC_Text
     self.PRED_GEO_LOCATION = ImportFieldAnnotation.PRED_GEO_LOCATION
     self.PRED_DATE_EVENT = ImportFieldAnnotation.PRED_DATE_EVENT
     self.PRED_OBS_NUM = ImportFieldAnnotation.PRED_OBS_NUM
     self.PRED_METADATA = ImportFieldAnnotation.PRED_METADATA
     self.PRED_DRAFT_CONTAINS = ImportFieldAnnotation.PRED_DRAFT_CONTAINS
     self.PRED_COMPLEX_DES = ImportFieldAnnotation.PRED_COMPLEX_DES
     self.PRED_COMPLEX_LABEL = ImportFieldAnnotation.PRED_COMPLEX_LABEL
     self.nav = False
     self.has_media_field = False
     self.has_doc_field = False
     self.has_complex_des_field = False
コード例 #5
0
ファイル: persons.py プロジェクト: ekansa/open-context-py
 def reconcile_item(self, imp_cell_obj):
     """ Checks to see if the item exists """
     self.imp_cell_obj = imp_cell_obj
     if len(imp_cell_obj.record) > 0:
         self.combined_name = imp_cell_obj.record
         self.label = imp_cell_obj.record
     else:
         pg = ProcessGeneral(self.source_id)
         if self.import_rows is not False:
             check_list = self.import_rows
         else:
             check_list = [imp_cell_obj.row_num]
         self.evenif_blank = pg.check_blank_required(imp_cell_obj.field_num,
                                                     check_list)
         if self.evenif_blank:
             self.combined_name = self.DEFAULT_BLANK
             self.label = self.DEFAULT_BLANK
     if isinstance(self.label, str):
         if len(self.label) > 0:
             match_found = self.match_against_persons(self.combined_name)
             if match_found is False:
                 # create new subject, manifest objects. Need new UUID, since we can't assume
                 # the fl_uuid for the ImportCell reflects unique entities in a field, since
                 # uniqueness depends on context (values in other cells)
                 self.new_entity = True
                 self.uuid = GenUUID.uuid4()
                 self.create_person_item()
     self.update_import_cell_uuid()
コード例 #6
0
 def get_link_annotations(self):
     """ Gets descriptive annotations, and a 
         or a list of fields that are not in containment relationships
     """
     link_annotations = ImportFieldAnnotation.objects\
                                             .filter(source_id=self.source_id)\
                                             .exclude(predicate__in=self.DEFAULT_EXCLUSION_PREDS)\
                                             .order_by('field_num', 'object_field_num')
     if len(link_annotations) > 0:
         self.count_active_fields = len(link_annotations)
         self.link_rels = LastUpdatedOrderedDict()
         for link_anno in link_annotations:
             pg = ProcessGeneral(self.source_id)
             subj_field = pg.get_field_obj(link_anno.field_num)
             obj_field = pg.get_field_obj(link_anno.object_field_num)
             if subj_field is not False and obj_field is not False:
                 # print('Found subject, object')
                 if subj_field.field_type in ImportProfile.DEFAULT_SUBJECT_TYPE_FIELDS \
                    and obj_field.field_type in ImportProfile.DEFAULT_SUBJECT_TYPE_FIELDS:
                     # print('Valid subject, object')
                     if link_anno.field_num not in self.link_rels:
                         rels = {'sub_field_obj': subj_field,
                                 'pred_objs': []}
                     else:
                         rels = self.link_rels[link_anno.field_num]
                     pred_obj = {'predicate_uuid': False,
                                 'pred_field_obj': False,
                                 'obj_field_obj': obj_field}
                     if link_anno.predicate_field_num > 0:
                         pred_obj['pred_field_obj'] = pg.get_field_obj(link_anno.predicate_field_num)
                     else:
                         pred_obj['predicate_uuid'] = link_anno.predicate
                     rels['pred_objs'].append(pred_obj)
                     self.link_rels[link_anno.field_num] = rels
コード例 #7
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.types_fields = False
     self.start_field = False
     self.stop_field = False
     self.start_row = 1
     self.batch_size = 250
     self.end_row = self.batch_size
     self.example_size = 5
コード例 #8
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.complex_des_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.count_new_assertions = 0
     self.obs_num_complex_description_assertions = 1
コード例 #9
0
ファイル: links.py プロジェクト: portableant/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.example_size = 5
     self.link_rels = False
     self.count_active_fields = 0
     self.count_new_assertions = 0
コード例 #10
0
ファイル: types.py プロジェクト: ekansa/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.types_fields = False
     self.start_field = False
     self.stop_field = False
     self.start_row = 1
     self.batch_size = 250
     self.end_row = self.batch_size
     self.example_size = 5
コード例 #11
0
ファイル: links.py プロジェクト: ekansa/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.example_size = 5
     self.link_rels = False
     self.count_active_fields = 0
     self.count_new_assertions = 0
コード例 #12
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.complex_des_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.count_new_assertions = 0
     self.obs_num_complex_description_assertions = 1
コード例 #13
0
ファイル: media.py プロジェクト: portableant/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.media_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #14
0
ファイル: persons.py プロジェクト: ekansa/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.persons_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #15
0
 def get_obs_num_field_num(self, field_obj):
     """ Gets the observation number field for descriptive fields if they exist."""
     obs_num_field_num = False
     obs_fields = ImportFieldAnnotation.objects\
                                       .filter(source_id=self.source_id,
                                               predicate=ImportFieldAnnotation.PRED_OBS_NUM,
                                               object_field_num=field_obj.field_num)[:1]
     if len(obs_fields) > 0:
         pg = ProcessGeneral(self.source_id)
         obs_num_obj = pg.get_field_obj(obs_fields[0].field_num)
         if obs_num_obj:
             if obs_num_obj.field_type == 'obs-num':
                 obs_num_field_num = obs_num_obj.field_num
     return obs_num_field_num
コード例 #16
0
ファイル: documents.py プロジェクト: rdhyee/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     # object for associated metadata to new manifest objects
     self.metadata_obj = ManifestMetadata(self.source_id,
                                          self.project_uuid)
     self.documents_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #17
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.description_annotations = False
     self.des_rels = False
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.example_size = 5
     self.reconciled_predicates = {}
     self.reconciled_types = {}
     self.field_valueofs = {}
     self.count_active_fields = 0
     self.count_new_assertions = 0
コード例 #18
0
ファイル: media.py プロジェクト: ekansa/open-context-py
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     # object for associated metadata to new manifest objects
     self.metadata_obj = ManifestMetadata(self.source_id,
                                          self.project_uuid)
     self.media_fields = []
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #19
0
 def get_field_valueofs(self, variable_field_num):
     """ gets the field_valueofs for a variable field_num """
     valueof_fields = []
     val_annos = ImportFieldAnnotation.objects\
                                      .filter(source_id=self.source_id,
                                              predicate=ImportFieldAnnotation.PRED_VALUE_OF,
                                              object_field_num=variable_field_num)\
                                      .order_by('field_num')
     if len(val_annos) > 0:
         for val_anno in val_annos:
             pg = ProcessGeneral(self.source_id)
             val_obj = pg.get_field_obj(val_anno.field_num)
             if val_obj is not False:
                 if val_obj.field_type == 'value':
                     valueof_fields.append(val_obj)
     self.field_valueofs[variable_field_num] = valueof_fields
     return valueof_fields
コード例 #20
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.subjects_fields = False
     self.contain_ordered_subjects = {}
     self.non_contain_subjects = []
     self.root_subject_field = False  # field_num for the root subject field
     self.field_parent_entities = {}  # Parent entities named for a given field
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.example_size = 5
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #21
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.fields = []
     self.has_media_field = False
     self.raw_field_annotations = []
     self.label = False
     self.has_subjects = False
     self.get_examples = False
     self.field_annotations = False
     self.PREDICATE_CONTAINS = Assertion.PREDICATES_CONTAINS
     self.PREDICATE_LINK = Assertion.PREDICATES_LINK
     self.PREDICATE_CONTAINED_IN = ImportFieldAnnotation.PRED_CONTAINED_IN
     self.PRED_DESCRIBES = ImportFieldAnnotation.PRED_DESCRIBES
     self.PRED_VALUE_OF = ImportFieldAnnotation.PRED_VALUE_OF
     self.PRED_MEDIA_PART_OF = ImportFieldAnnotation.PRED_MEDIA_PART_OF
     self.nav = False
コード例 #22
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.imp_source_obj = False
     self.row_count = False
     self.imp_status = False
     self.start_row = False
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.act_process_num = False
     self.next_process_num = False
     self.done = False
     self.error = False
     self.ok = True
     self.active_processes = self.DEFAULT_PROCESS_STAGES
     self.get_refine_source_meta()
     self.get_active_stage_row()
コード例 #23
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.imp_source_obj = False
     self.row_count = False
     self.imp_status = False
     self.start_row = False
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.act_process_num = False
     self.next_process_num = False
     self.done = False
     self.error = False
     self.ok = True
     self.active_processes = self.DEFAULT_PROCESS_STAGES
     self.get_refine_source_meta()
     self.get_active_stage_row()
コード例 #24
0
 def __init__(self, source_id):
     self.source_id = source_id
     pg = ProcessGeneral(source_id)
     pg.get_source()
     self.project_uuid = pg.project_uuid
     self.subjects_fields = False
     self.geospace_fields = {}  # subject field num is key, dict has valid lat + lon fields
     self.date_fields = {}  # subject field num is key, dict has early late fields
     self.geojson_rels = {}  # subject field_num is key, integer value is geojson field_num
     self.contain_ordered_subjects = {}
     self.non_contain_subjects = []
     self.root_subject_field = False  # field_num for the root subject field
     self.field_parent_entities = {}  # Parent entities named for a given field
     self.start_row = 1
     self.batch_size = settings.IMPORT_BATCH_SIZE
     self.end_row = self.batch_size
     self.example_size = 5
     self.count_active_fields = 0
     self.new_entities = []
     self.reconciled_entities = []
     self.not_reconciled_entities = []
コード例 #25
0
 def get_variable_valueof(self, des_field_obj):
     """ Checks to see if the des_by_field is a variable that has designated values """
     valueof_fields = []
     if des_field_obj.field_type == 'variable':
         if des_field_obj.field_num in self.field_valueofs:
             valueof_fields = self.field_valueofs[des_field_obj.field_num]
         else:
             # get list of field_nums that have the des_by_field as their object
             val_annos = ImportFieldAnnotation.objects\
                                              .filter(source_id=self.source_id,
                                                      predicate=ImportFieldAnnotation.PRED_VALUE_OF,
                                                      object_field_num=des_field_obj.field_num)\
                                              .order_by(field_num)
             if len(val_annos) > 1:
                 for val_anno in val_annos:
                     pg = ProcessGeneral(self.source_id)
                     val_obj = pg.get_field_obj(val_anno.field_num)
                     if val_obj is not False:
                         if val_obj.field_type == 'value':
                             valueof_fields.append(val_obj)
             self.field_valueofs[des_field_obj.field_num] = valueof_fields
     return valueof_fields
コード例 #26
0
 def reconcile_item(self, imp_cell_obj):
     """ Checks to see if the item exists in the subjects table """
     self.imp_cell_obj = imp_cell_obj
     if len(imp_cell_obj.record) > 0:
         self.label = self.label_prefix + imp_cell_obj.record
     else:
         pg = ProcessGeneral(self.source_id)
         if self.import_rows is not False:
             check_list = self.import_rows
         else:
             check_list = [imp_cell_obj.row_num]
         self.evenif_blank = pg.check_blank_required(imp_cell_obj.field_num,
                                                     check_list)
         if self.evenif_blank:
             self.label = self.label_prefix + self.DEFAULT_BLANK
     if self.allow_new and self.label is not False:
         # Only create a new item if it is allowed and if the label is not false
         if len(self.parent_context) > 0:
             self.context = self.parent_context + Subject.HIEARCHY_DELIM + self.label
         else:
             self.context = self.label
         match_found = self.match_against_subjects(self.context)
         if match_found is False:
             # create new subject, manifest objects. Need new UUID, since we can't assume
             # the fl_uuid for the ImportCell reflects unique entities in a field, since
             # uniqueness depends on context (values in other cells)
             self.uuid = GenUUID.uuid4()
             self.create_subject_item()
             self.is_new = True
     else:
         if self.label is not False:
             # only allow matches on non-blank items when not creating a record
             match_found = self.match_against_manifest(self.label,
                                                       self.class_uri)
     self.update_import_cell_uuid()
     self.add_contain_assertion()
コード例 #27
0
ファイル: documents.py プロジェクト: rdhyee/open-context-py
 def reconcile_item(self, imp_cell_obj):
     """ Checks to see if the item exists """
     self.imp_cell_obj = imp_cell_obj
     if len(imp_cell_obj.record) > 0:
         self.label = imp_cell_obj.record
     else:
         pg = ProcessGeneral(self.source_id)
         if self.import_rows is not False:
             check_list = self.import_rows
         else:
             check_list = [imp_cell_obj.row_num]
     if self.label is not False:
         match_found = self.match_against_documents(self.label)
         if match_found is False:
             # create new document, manifest objects.
             self.new_entity = True
             sup_metadata = None
             self.uuid = GenUUID.uuid4()
             if self.metadata_obj is not None:
                 sup_metadata = self.metadata_obj.get_metadata(imp_cell_obj.field_num,
                                                               imp_cell_obj.row_num)
                 meta_uuid = self.metadata_obj.get_uuid_from_metadata_dict(sup_metadata)
                 if isinstance(meta_uuid, str):
                     # use the uuid in the metadata!
                     self.uuid = meta_uuid
             self.create_document_item(sup_metadata)
         else:
             act_doc = None
             exist_docs = OCdocument.objects\
                                    .filter(uuid=self.uuid)[:1]
             if len(exist_docs) < 1:
                 # problem! We have a manifest record for the document, but no document record,
                 # so make one
                 act_doc = OCdocument()
                 act_doc.uuid = self.uuid  # use the previously assigned temporary UUID
                 act_doc.project_uuid = self.project_uuid
                 act_doc.source_id = self.source_id
                 act_doc.content = self.content
                 act_doc.save()
             else:
                 act_doc = exist_docs[0]
             if act_doc is not None:
                 if act_doc.content != self.content and self.content != self.DEFAULT_NO_CONTENT:
                     # update the document content with the latest content
                     act_doc.content = self.content
                     act_doc.save()
     self.update_import_cell_uuid()
コード例 #28
0
 def process_complex_batch(self):
     """ processes fields for documents
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_complex_description_fields()
     label_str_uuids = {}
     if len(self.complex_des_fields) > 0:
         print('Number of Complex Description Fields: ' + str(len(self.complex_des_fields)))
         cp_id_number = 0
         for cp_field in self.complex_des_fields:
             cp_id_number += 1
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(cp_field.describes_field.field_num,
                                                                False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if cp_field.obs_num < 1:
                         obs_num = 1
                     else:
                         obs_num = cp_field.obs_num
                     obs_node = '#obs-' + str(obs_num)
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = cp_field.describes_field.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     print('Look for complex description labels in rows: ' + str(in_rows))
                     if subject_ok is not False:
                         # OK! we have the subjects of complex descriptions
                         # with uuids, so now we can make an fl_uuid for each
                         # of the complex description fields.
                         complex_uuid = subject_uuid + self.FRAG_ID_PREFIX + str(cp_id_number)
                         complex_recs = ImportCell.objects\
                                                  .filter(source_id=self.source_id,
                                                          field_num=cp_field.field_num,
                                                          row_num__in=in_rows)\
                                                  .exclude(record='')
                         if len(complex_recs) > 0:
                             # we have records in the complex description field that are not blank
                             # and are associated with the subject of the complex description.
                             # so now, let's record this association.
                             save_ok = False
                             new_ass = Assertion()
                             new_ass.uuid = subject_uuid
                             new_ass.subject_type = subject_type
                             new_ass.project_uuid = self.project_uuid
                             new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                             new_ass.obs_node = obs_node
                             new_ass.obs_num = obs_num
                             new_ass.sort = 100 + cp_id_number
                             new_ass.visibility = 1
                             new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES
                             new_ass.object_type = 'complex-description'
                             new_ass.object_uuid = complex_uuid
                             new_ass.save()
                             try:
                                 print('Saved complex-description: ' + complex_uuid)
                                 new_ass.save()
                                 save_ok = True
                             except:
                                 save_ok = False
                             if save_ok:
                                 self.count_new_assertions += 1
                             # now look through the complex description records and make labels
                             for comp_rec in complex_recs:
                                 # first save the fl_uuid for the complex description
                                 comp_rec.fl_uuid = complex_uuid
                                 comp_rec.save()
                                 if isinstance(cp_field.value_prefix, str):
                                     cp_label = cp_field.value_prefix + comp_rec.record
                                 else:
                                     cp_label = comp_rec.record
                                 if cp_label not in label_str_uuids:
                                     # make a uuid for the record value
                                     # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                     sm = StringManagement()
                                     sm.project_uuid = self.project_uuid
                                     sm.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                     oc_string = sm.get_make_string(cp_label)
                                     content_uuid = oc_string.uuid
                                     label_str_uuids[cp_label] = content_uuid
                                 content_uuid = label_str_uuids[cp_label]
                                 save_ok = False
                                 new_ass = Assertion()
                                 new_ass.uuid = complex_uuid
                                 new_ass.subject_type = 'complex-description'
                                 new_ass.project_uuid = self.project_uuid
                                 # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                 new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                 new_ass.obs_node = '#obs-' + str(self.obs_num_complex_description_assertions)
                                 new_ass.obs_num = self.obs_num_complex_description_assertions
                                 new_ass.sort = 1
                                 new_ass.visibility = 1
                                 new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES_LABEL
                                 new_ass.object_type = 'xsd:string'
                                 new_ass.object_uuid = content_uuid
                                 try:
                                     new_ass.save()
                                     save_ok = True
                                 except:
                                     save_ok = False
                                 if save_ok:
                                     self.count_new_assertions += 1
コード例 #29
0
ファイル: links.py プロジェクト: ekansa/open-context-py
 def get_link_examples(self):
     """ Gets example entities with linking relations
     """
     example_entities = []
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records 
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = rels['sub_field_obj'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(row_key)
                         entity['links'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         in_rows = [dist_rec['rows'][0]]
                         for pred_obj in rels['pred_objs']:
                             act_preds = []
                             if pred_obj['predicate_uuid'] is not False:
                                 pred_item = LastUpdatedOrderedDict()
                                 pred_item['id'] = pred_obj['predicate_uuid']
                                 ent = Entity()
                                 found = ent.dereference(pred_obj['predicate_uuid'])
                                 if found:
                                     pred_item['label'] = ent.label
                                 else:
                                     pred_item['label'] = '[Missing predicate!]'
                                 act_preds.append(pred_item)
                             elif pred_obj['pred_field_obj'] is not False:
                                 # linking predicate is in a field
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 predicate_records= pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                                         in_rows)
                                 for pred_row_key, pred_rec in predicate_records.items():
                                     pred_item = LastUpdatedOrderedDict()
                                     pred_item['id'] = str(pred_obj['pred_field_obj'].field_num)
                                     pred_item['id'] += '-' + str(pred_rec['rows'][0])
                                     pred_item['label'] = pred_rec['imp_cell_obj'].record
                                     if len(pred_item['label']) < 1:
                                         pred_item['label'] = '[BLANK]'
                                     if len(act_precs) < self.example_size:
                                         act_preds.append(pred_item)
                             for pred_item in act_preds:
                                 link_item = LastUpdatedOrderedDict()
                                 link_item['predicate'] = pred_item
                                 # values are in a list, to keep consistent with descriptions
                                 link_item['object'] = False
                                 obj_field_obj = pred_obj['obj_field_obj']
                                 # now get a value for the object from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 obj_recs = pc.get_field_records(obj_field_obj.field_num,
                                                                 in_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 obj_rec = pg.get_first_distinct_record(obj_recs)
                                 if obj_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_label = obj_field_obj.value_prefix
                                     if len(obj_rec['imp_cell_obj'].record) > 1:
                                         object_label += obj_rec['imp_cell_obj'].record
                                     else:
                                         object_label += '[BLANK]'
                                     object_val['label'] = object_label
                                     object_val['id'] = str(obj_rec['imp_cell_obj'].field_num)
                                     object_val['id'] += '-' + str(obj_rec['rows'][0])
                                     link_item['object'] = object_val
                                     if len(entity['links']) < self.example_size:
                                         entity['links'].append(link_item)
                         example_entities.append(entity)
     return example_entities
コード例 #30
0
 def process_complex_batch(self):
     """ processes fields for documents
         entities starting with a given row number.
         This iterates over all containment fields, starting
         with the root subjhect field
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_complex_description_fields()
     label_str_uuids = {}
     if len(self.complex_des_fields) > 0:
         print('Number of Complex Description Fields: ' +
               str(len(self.complex_des_fields)))
         cp_id_number = 0
         for cp_field in self.complex_des_fields:
             cp_id_number += 1
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(
                 cp_field.describes_field.field_num, False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if cp_field.obs_num < 1:
                         obs_num = 1
                     else:
                         obs_num = cp_field.obs_num
                     obs_node = '#obs-' + str(obs_num)
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = cp_field.describes_field.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     print('Look for complex description labels in rows: ' +
                           str(in_rows))
                     if subject_ok is not False:
                         # OK! we have the subjects of complex descriptions
                         # with uuids, so now we can make an fl_uuid for each
                         # of the complex description fields.
                         complex_uuid = subject_uuid + self.FRAG_ID_PREFIX + str(
                             cp_id_number)
                         complex_recs = ImportCell.objects\
                                                  .filter(source_id=self.source_id,
                                                          field_num=cp_field.field_num,
                                                          row_num__in=in_rows)\
                                                  .exclude(record='')
                         if len(complex_recs) > 0:
                             # we have records in the complex description field that are not blank
                             # and are associated with the subject of the complex description.
                             # so now, let's record this association.
                             save_ok = False
                             new_ass = Assertion()
                             new_ass.uuid = subject_uuid
                             new_ass.subject_type = subject_type
                             new_ass.project_uuid = self.project_uuid
                             new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                             new_ass.obs_node = obs_node
                             new_ass.obs_num = obs_num
                             new_ass.sort = 100 + cp_id_number
                             new_ass.visibility = 1
                             new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES
                             new_ass.object_type = 'complex-description'
                             new_ass.object_uuid = complex_uuid
                             new_ass.save()
                             try:
                                 print('Saved complex-description: ' +
                                       complex_uuid)
                                 new_ass.save()
                                 save_ok = True
                             except:
                                 save_ok = False
                             if save_ok:
                                 self.count_new_assertions += 1
                             # now look through the complex description records and make labels
                             for comp_rec in complex_recs:
                                 # first save the fl_uuid for the complex description
                                 comp_rec.fl_uuid = complex_uuid
                                 comp_rec.save()
                                 if isinstance(cp_field.value_prefix, str):
                                     cp_label = cp_field.value_prefix + comp_rec.record
                                 else:
                                     cp_label = comp_rec.record
                                 if cp_label not in label_str_uuids:
                                     # make a uuid for the record value
                                     # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                     sm = StringManagement()
                                     sm.project_uuid = self.project_uuid
                                     sm.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                     oc_string = sm.get_make_string(
                                         cp_label)
                                     content_uuid = oc_string.uuid
                                     label_str_uuids[
                                         cp_label] = content_uuid
                                 content_uuid = label_str_uuids[cp_label]
                                 save_ok = False
                                 new_ass = Assertion()
                                 new_ass.uuid = complex_uuid
                                 new_ass.subject_type = 'complex-description'
                                 new_ass.project_uuid = self.project_uuid
                                 # adding a source_id suffix keeps this from being deleted as descriptions get processed
                                 new_ass.source_id = self.source_id + ProcessGeneral.COMPLEX_DESCRIPTION_SOURCE_SUFFIX
                                 new_ass.obs_node = '#obs-' + str(
                                     self.
                                     obs_num_complex_description_assertions)
                                 new_ass.obs_num = self.obs_num_complex_description_assertions
                                 new_ass.sort = 1
                                 new_ass.visibility = 1
                                 new_ass.predicate_uuid = ComplexDescription.PREDICATE_COMPLEX_DES_LABEL
                                 new_ass.object_type = 'xsd:string'
                                 new_ass.object_uuid = content_uuid
                                 try:
                                     new_ass.save()
                                     save_ok = True
                                 except:
                                     save_ok = False
                                 if save_ok:
                                     self.count_new_assertions += 1
コード例 #31
0
 def get_description_examples(self):
     """ Gets example entities described by other fields
     """
     example_entities = []
     self.get_description_annotations()
     if self.des_rels is not False:
         for subj_field_num, ent_obj in self.des_rels.items():
             # get some example records 
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = ent_obj['field'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(row_key)
                         entity['descriptions'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         for des_field_obj in ent_obj['des_by_fields']:
                             des_item = LastUpdatedOrderedDict()
                             des_item['predicate'] = LastUpdatedOrderedDict()
                             # values are in a list, in case there are more than 1 (variable-value)
                             des_item['objects'] = []
                             des_item['predicate']['type'] = des_field_obj.field_type
                             if des_field_obj.field_type == 'description':
                                 # set the predicate for this description
                                 des_item['predicate']['label'] = des_field_obj.label
                                 des_item['predicate']['id'] = des_field_obj.field_num
                                 # now get a value for this description from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 val_recs = pc.get_field_records(des_field_obj.field_num,
                                                                 example_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 val_rec = pg.get_first_distinct_record(val_recs)
                                 if val_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_val['record'] = val_rec['imp_cell_obj'].record
                                     object_val['id'] = val_rec['rows'][0]
                                     des_item['objects'].append(object_val)
                             elif des_field_obj.field_type == 'variable':
                                 # need to get the predicate from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 var_recs = pc.get_field_records(des_field_obj.field_num,
                                                                 example_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 val_rec = pg.get_first_distinct_record(val_recs)
                                 if var_rec is not False:
                                     des_item['predicate']['label'] = var_rec['imp_cell_obj'].record
                                     pid = str(des_field_obj.field_num) + '-' + str(var_rec['rows'][0])
                                     des_item['predicate']['id'] = pid
                                     # now need to get fields that have object values for the predicate
                                     valueof_fields = self.get_variable_valueof(des_field_obj)
                                     for val_field_obj in valueof_fields:
                                         pc = ProcessCells(self.source_id,
                                                           self.start_row)
                                         val_recs = pc.get_field_records(val_field_obj.field_num,
                                                                         example_rows)
                                         pg = ProcessGeneral(self.source_id)
                                         val_rec = pg.get_first_distinct_record(val_recs)
                                         if val_rec is not False:
                                             object_val = LastUpdatedOrderedDict()
                                             object_val['record'] = val_rec['imp_cell_obj'].record
                                             oid = str(val_field_obj.field_num) + '-' + str(val_rec['rows'][0])
                                             object_val['id'] = oid
                                             des_item['objects'].append(object_val)
                             entity['descriptions'].append(des_item)
                         example_entities.append(entity)
     return example_entities
コード例 #32
0
ファイル: links.py プロジェクト: portableant/open-context-py
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(subj_field_num, False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     if subject_uuid is False:
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj[
                                     'pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             pred_recs = pc.get_field_records(
                                 pred_obj['pred_field_obj'].field_num,
                                 in_rows)
                             for pred_rec in pred_recs:
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(
                                     pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items(
                         ):
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records(
                                 obj_field_obj.field_num, act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             for hash_key, obj_rec in obj_recs.items():
                                 # print('Worry about: ' + str(obj_rec['imp_cell_obj'].record))
                                 object_uuid = obj_rec[
                                     'imp_cell_obj'].fl_uuid
                                 object_type = obj_field_obj.field_type
                                 object_ok = obj_rec['imp_cell_obj'].cell_ok
                                 cla = CandidateLinkAssertion()
                                 cla.project_uuid = self.project_uuid
                                 cla.source_id = self.source_id
                                 cla.subject_uuid = subject_uuid
                                 cla.subject_type = subject_type
                                 cla.obs_node = obs_node
                                 cla.obs_num = obs_num
                                 cla.sort = sort
                                 cla.predicate_uuid = predicate_uuid
                                 cla.object_uuid = object_uuid
                                 cla.object_type = object_type
                                 if (subject_ok and object_ok
                                     ) and predicate_uuid is not False:
                                     # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                     cla.create_link()
                                     if cla.is_valid:
                                         self.count_new_assertions += 1
                                         print(
                                             'Count OK: ' +
                                             str(self.count_new_assertions))
コード例 #33
0
 def process_description_batch(self):
     """ processes fields describing a subject (subjects, media, documents, persons, projects)
         entity field.
         if start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_description_annotations()
     if self.des_rels is not False:
         for subj_field_num, ent_obj in self.des_rels.items():
             # loop through the fields that describe the subj_field_num
             self.reconcile_descriptive_predicates(ent_obj['des_by_fields'])
         # --------
         # reconciles types and strings by looping through reconciled predicate fields
         self.reconcile_types_strings()
         # --------
         for subj_field_num, ent_obj in self.des_rels.items():
             subj_field_type = ent_obj['field'].field_type
             # get records for the subject of the description
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records_by_fl_uuid(subj_field_num,
                                                                False)
             if distinct_records is not False:
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 # print(str(distinct_records))
                 for row_key, dist_rec in distinct_records.items():
                     if dist_rec['imp_cell_obj'].cell_ok:
                         subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                         # the subject record is OK to use for creating
                         # description records
                         for des_field_obj in ent_obj['des_by_fields']:
                             des_field_num = des_field_obj.field_num
                             if des_field_obj.obs_num < 1:
                                 obs_num = 1
                             else:
                                 obs_num = des_field_obj.obs_num
                             obs_node = '#obs-' + str(obs_num)
                             # get the 'value-of' import cell objects for the current
                             # 'descriptive' or 'variable' field_num
                             # 'variable' field_nums may make multiple 'value-of' import_cell_objs
                             object_imp_cell_objs = self.get_assertion_object_values(des_field_num,
                                                                                     dist_rec['rows'])
                             for imp_cell_obj in object_imp_cell_objs:
                                 row_num = imp_cell_obj.row_num
                                 predicate = self.look_up_predicate(des_field_num,
                                                                    row_num)
                                 if predicate is not False:
                                     cd = CandidateDescription()
                                     cd.source_id = self.source_id
                                     cd.project_uuid = self.project_uuid
                                     cd.subject_uuid = subject_uuid
                                     cd.subject_type = subj_field_type
                                     cd.obs_num = obs_num
                                     cd.obs_node = obs_node
                                     cd.sort = des_field_num
                                     cd.predicate_uuid = str(predicate.uuid)
                                     cd.data_type = predicate.data_type
                                     cd.record = str(imp_cell_obj.record)
                                     cd.fl_uuid = imp_cell_obj.fl_uuid
                                     cd.l_uuid = imp_cell_obj.l_uuid
                                     cd.create_description()
                                     if cd.is_valid:
                                         self.count_new_assertions += 1
コード例 #34
0
ファイル: links.py プロジェクト: portableant/open-context-py
 def get_link_examples(self):
     """ Gets example entities with linking relations
     """
     example_entities = []
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             pc = ProcessCells(self.source_id, self.start_row)
             distinct_records = pc.get_field_records(subj_field_num, False)
             if distinct_records is not False:
                 entity_example_count = 0
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(
                     distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     if entity_example_count < self.example_size:
                         # if we're less than the example size, make
                         # an example object
                         entity_example_count += 1
                         entity = LastUpdatedOrderedDict()
                         entity_label = dist_rec['imp_cell_obj'].record
                         if len(entity_label) < 1:
                             entity_label = '[BLANK]'
                         entity_label = rels[
                             'sub_field_obj'].value_prefix + entity_label
                         entity['label'] = entity_label
                         entity['id'] = str(subj_field_num) + '-' + str(
                             row_key)
                         entity['links'] = []
                         example_rows = []
                         example_rows.append(dist_rec['rows'][0])
                         in_rows = [dist_rec['rows'][0]]
                         for pred_obj in rels['pred_objs']:
                             act_preds = []
                             if pred_obj['predicate_uuid'] is not False:
                                 pred_item = LastUpdatedOrderedDict()
                                 pred_item['id'] = pred_obj[
                                     'predicate_uuid']
                                 ent = Entity()
                                 found = ent.dereference(
                                     pred_obj['predicate_uuid'])
                                 if found:
                                     pred_item['label'] = ent.label
                                 else:
                                     pred_item[
                                         'label'] = '[Missing predicate!]'
                                 act_preds.append(pred_item)
                             elif pred_obj['pred_field_obj'] is not False:
                                 # linking predicate is in a field
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 pred_recs = pc.get_field_records(
                                     pred_obj['pred_field_obj'].field_num,
                                     in_rows)
                                 for pred_rec in pred_recs:
                                     pred_item = LastUpdatedOrderedDict()
                                     pred_item['id'] = str(
                                         pred_obj['pred_field_obj'].
                                         field_num)
                                     pred_item['id'] += '-' + str(
                                         pred_rec['rows'][0])
                                     pred_item['label'] = pred_rec[
                                         'imp_cell_obj'].record
                                     if len(pred_item['label']) < 1:
                                         pred_item['label'] = '[BLANK]'
                                     if len(act_precs) < self.example_size:
                                         act_preds.append(pred_item)
                             for pred_item in act_preds:
                                 link_item = LastUpdatedOrderedDict()
                                 link_item['predicate'] = pred_item
                                 # values are in a list, to keep consistent with descriptions
                                 link_item['object'] = False
                                 obj_field_obj = pred_obj['obj_field_obj']
                                 # now get a value for the object from the imported cells
                                 pc = ProcessCells(self.source_id,
                                                   self.start_row)
                                 obj_recs = pc.get_field_records(
                                     obj_field_obj.field_num, in_rows)
                                 pg = ProcessGeneral(self.source_id)
                                 obj_rec = pg.get_first_distinct_record(
                                     obj_recs)
                                 if obj_rec is not False:
                                     object_val = LastUpdatedOrderedDict()
                                     object_label = obj_field_obj.value_prefix
                                     if len(obj_rec['imp_cell_obj'].record
                                            ) > 1:
                                         object_label += obj_rec[
                                             'imp_cell_obj'].record
                                     else:
                                         object_label += '[BLANK]'
                                     object_val['label'] = object_label
                                     object_val['id'] = str(
                                         obj_rec['imp_cell_obj'].field_num)
                                     object_val['id'] += '-' + str(
                                         obj_rec['rows'][0])
                                     link_item['object'] = object_val
                                     if len(entity['links']
                                            ) < self.example_size:
                                         entity['links'].append(link_item)
                         example_entities.append(entity)
     return example_entities
コード例 #35
0
ファイル: links.py プロジェクト: portableant/open-context-py
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id,
                               self.start_row)
             distinct_records = pc.get_field_records(subj_field_num,
                                                     False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     if subject_uuid is False:
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj['pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             pred_recs = pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                              in_rows)
                             for pred_rec in pred_recs:
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items():
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records(obj_field_obj.field_num,
                                                             act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             for hash_key, obj_rec in obj_recs.items():
                                 # print('Worry about: ' + str(obj_rec['imp_cell_obj'].record))
                                 object_uuid = obj_rec['imp_cell_obj'].fl_uuid
                                 object_type = obj_field_obj.field_type
                                 object_ok = obj_rec['imp_cell_obj'].cell_ok
                                 cla = CandidateLinkAssertion()
                                 cla.project_uuid = self.project_uuid
                                 cla.source_id = self.source_id
                                 cla.subject_uuid = subject_uuid
                                 cla.subject_type = subject_type
                                 cla.obs_node = obs_node
                                 cla.obs_num = obs_num
                                 cla.sort = sort
                                 cla.predicate_uuid = predicate_uuid
                                 cla.object_uuid = object_uuid
                                 cla.object_type = object_type
                                 if (subject_ok and object_ok) and predicate_uuid is not False:
                                     # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                     cla.create_link()
                                     if cla.is_valid:
                                         self.count_new_assertions += 1
                                         print('Count OK: ' + str(self.count_new_assertions))
コード例 #36
0
 def process_link_batch(self):
     """ processes fields describing linking relations
         between subjects, media, documents, persons, projects entities.
         If start_row is 1, then previous imports of this source are cleared
     """
     self.clear_source()  # clear prior import for this source
     self.end_row = self.start_row + self.batch_size
     self.get_link_annotations()
     if self.link_rels is not False:
         for subj_field_num, rels in self.link_rels.items():
             # get some example records
             sub_field_obj = rels['sub_field_obj']
             pc = ProcessCells(self.source_id,
                               self.start_row)
             # now get distinct records as determined by having the same assigned
             # uuid
             distinct_records = pc.get_field_records_by_fl_uuid(subj_field_num,
                                                                False)
             if distinct_records is not False:
                 # sort the list in row_order from the import table
                 pg = ProcessGeneral(self.source_id)
                 distinct_records = pg.order_distinct_records(distinct_records)
                 for row_key, dist_rec in distinct_records.items():
                     subject_uuid = dist_rec['imp_cell_obj'].fl_uuid
                     subject_type = sub_field_obj.field_type
                     subject_ok = dist_rec['imp_cell_obj'].cell_ok
                     subject_record = dist_rec['imp_cell_obj'].record
                     if subject_uuid is False or\
                        len(subject_record) < 1:
                         subject_ok = False
                     if subject_uuid == 'False':
                         subject_ok = False
                     sort = 0
                     in_rows = dist_rec['rows']
                     if subject_ok is False:
                         in_rows = [-1]
                     for pred_obj in rels['pred_objs']:
                         act_preds = {}
                         obs_num = 1  # default observation number
                         if pred_obj['predicate_uuid'] is not False:
                             # limit to the 'in rows' for the current item
                             act_preds[pred_obj['predicate_uuid']] = in_rows
                         elif pred_obj['pred_field_obj'] is not False:
                             # linking predicate is in a field
                             if pred_obj['pred_field_obj'].obs_num > 0:
                                 obs_num = pred_obj['pred_field_obj'].obs_num
                             sort = pred_obj['pred_field_obj'].field_num
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             predicate_records= pc.get_field_records(pred_obj['pred_field_obj'].field_num,
                                                                     in_rows)
                             for pred_row_key, pred_rec in predicate_records.items():
                                 clp = CandidateLinkPredicate()
                                 clp.source_id = self.source_id
                                 clp.project_uuid = self.project_uuid
                                 clp.make_reconcile_link_pred(pred_rec['imp_cell_obj'].record)
                                 if clp.uuid is not False:
                                     act_preds[clp.uuid] = pred_rec['rows']
                         obs_node = '#obs-' + str(obs_num)
                         for predicate_uuid, act_in_rows in act_preds.items():
                             obj_field_obj = pred_obj['obj_field_obj']
                             # now get a value for the object from the imported cells
                             pc = ProcessCells(self.source_id,
                                               self.start_row)
                             obj_recs = pc.get_field_records_by_fl_uuid(obj_field_obj.field_num,
                                                                        act_in_rows)
                             if sort < 1:
                                 sort = obj_field_obj.field_num
                             if obj_recs is not False:
                                 for hash_key, obj_rec in obj_recs.items():
                                     object_uuid = obj_rec['imp_cell_obj'].fl_uuid
                                     object_type = obj_field_obj.field_type
                                     object_ok = obj_rec['imp_cell_obj'].cell_ok
                                     object_record = obj_rec['imp_cell_obj'].record
                                     if len(object_record) < 1:
                                         # blank record, don't make a link
                                         object_ok = False
                                     if object_uuid is False or\
                                         len(object_uuid) < 1:
                                          object_ok = False
                                     if object_uuid == 'False':
                                          object_ok = False
                                     if object_ok and subject_ok:
                                         message = 'Attempt link: ' + subject_record + ' ('+ subject_uuid + ') -> '
                                         message += predicate_uuid + ' -> ' + object_record + ' ('+ object_uuid + ')'
                                         message += 'in rows: ' + str(act_in_rows)
                                         # print(message)
                                         cla = CandidateLinkAssertion()
                                         cla.project_uuid = self.project_uuid
                                         cla.source_id = self.source_id
                                         cla.subject_uuid = subject_uuid
                                         cla.subject_type = subject_type
                                         cla.obs_node = obs_node
                                         cla.obs_num = obs_num
                                         cla.sort = sort
                                         cla.predicate_uuid = predicate_uuid
                                         cla.object_uuid = object_uuid
                                         cla.object_type = object_type
                                         if (subject_ok and object_ok) and predicate_uuid is not False:
                                             # print('Link ok: ' + str(obj_rec['imp_cell_obj'].record))
                                             cla.create_link()
                                             if cla.is_valid:
                                                 self.count_new_assertions += 1
                                                 print('Link Count OK: ' + str(self.count_new_assertions))