def get_link_examples(self): """ Gets example entities with linking relations """ example_entities = [] self.get_link_annotations() if self.link_rels is not False: for subj_field_num, rels in self.link_rels.items(): # get some example records pc = ProcessCells(self.source_id, self.start_row) distinct_records = pc.get_field_records(subj_field_num, False) if distinct_records is not False: entity_example_count = 0 # sort the list in row_order from the import table pg = ProcessGeneral(self.source_id) distinct_records = pg.order_distinct_records( distinct_records) for row_key, dist_rec in distinct_records.items(): if entity_example_count < self.example_size: # if we're less than the example size, make # an example object entity_example_count += 1 entity = LastUpdatedOrderedDict() entity_label = dist_rec['imp_cell_obj'].record if len(entity_label) < 1: entity_label = '[BLANK]' entity_label = rels[ 'sub_field_obj'].value_prefix + entity_label entity['label'] = entity_label entity['id'] = str(subj_field_num) + '-' + str( row_key) entity['links'] = [] example_rows = [] example_rows.append(dist_rec['rows'][0]) in_rows = [dist_rec['rows'][0]] for pred_obj in rels['pred_objs']: act_preds = [] if pred_obj['predicate_uuid'] is not False: pred_item = LastUpdatedOrderedDict() pred_item['id'] = pred_obj[ 'predicate_uuid'] ent = Entity() found = ent.dereference( pred_obj['predicate_uuid']) if found: pred_item['label'] = ent.label else: pred_item[ 'label'] = '[Missing predicate!]' act_preds.append(pred_item) elif pred_obj['pred_field_obj'] is not False: # linking predicate is in a field pc = ProcessCells(self.source_id, self.start_row) pred_recs = pc.get_field_records( pred_obj['pred_field_obj'].field_num, in_rows) for pred_rec in pred_recs: pred_item = LastUpdatedOrderedDict() pred_item['id'] = str( pred_obj['pred_field_obj']. field_num) pred_item['id'] += '-' + str( pred_rec['rows'][0]) pred_item['label'] = pred_rec[ 'imp_cell_obj'].record if len(pred_item['label']) < 1: pred_item['label'] = '[BLANK]' if len(act_precs) < self.example_size: act_preds.append(pred_item) for pred_item in act_preds: link_item = LastUpdatedOrderedDict() link_item['predicate'] = pred_item # values are in a list, to keep consistent with descriptions link_item['object'] = False obj_field_obj = pred_obj['obj_field_obj'] # now get a value for the object from the imported cells pc = ProcessCells(self.source_id, self.start_row) obj_recs = pc.get_field_records( obj_field_obj.field_num, in_rows) pg = ProcessGeneral(self.source_id) obj_rec = pg.get_first_distinct_record( obj_recs) if obj_rec is not False: object_val = LastUpdatedOrderedDict() object_label = obj_field_obj.value_prefix if len(obj_rec['imp_cell_obj'].record ) > 1: object_label += obj_rec[ 'imp_cell_obj'].record else: object_label += '[BLANK]' object_val['label'] = object_label object_val['id'] = str( obj_rec['imp_cell_obj'].field_num) object_val['id'] += '-' + str( obj_rec['rows'][0]) link_item['object'] = object_val if len(entity['links'] ) < self.example_size: entity['links'].append(link_item) example_entities.append(entity) return example_entities
def get_description_examples(self): """ Gets example entities described by other fields """ example_entities = [] self.get_description_annotations() if self.des_rels is not False: for subj_field_num, ent_obj in self.des_rels.items(): # get some example records pc = ProcessCells(self.source_id, self.start_row) distinct_records = pc.get_field_records(subj_field_num, False) if distinct_records is not False: entity_example_count = 0 # sort the list in row_order from the import table pg = ProcessGeneral(self.source_id) distinct_records = pg.order_distinct_records(distinct_records) for row_key, dist_rec in distinct_records.items(): if entity_example_count < self.example_size: # if we're less than the example size, make # an example object entity_example_count += 1 entity = LastUpdatedOrderedDict() entity_label = dist_rec['imp_cell_obj'].record if len(entity_label) < 1: entity_label = '[BLANK]' entity_label = ent_obj['field'].value_prefix + entity_label entity['label'] = entity_label entity['id'] = str(subj_field_num) + '-' + str(row_key) entity['descriptions'] = [] example_rows = [] example_rows.append(dist_rec['rows'][0]) for des_field_obj in ent_obj['des_by_fields']: des_item = LastUpdatedOrderedDict() des_item['predicate'] = LastUpdatedOrderedDict() # values are in a list, in case there are more than 1 (variable-value) des_item['objects'] = [] des_item['predicate']['type'] = des_field_obj.field_type if des_field_obj.field_type == 'description': # set the predicate for this description des_item['predicate']['label'] = des_field_obj.label des_item['predicate']['id'] = des_field_obj.field_num # now get a value for this description from the imported cells pc = ProcessCells(self.source_id, self.start_row) val_recs = pc.get_field_records(des_field_obj.field_num, example_rows) pg = ProcessGeneral(self.source_id) val_rec = pg.get_first_distinct_record(val_recs) if val_rec is not False: object_val = LastUpdatedOrderedDict() object_val['record'] = val_rec['imp_cell_obj'].record object_val['id'] = val_rec['rows'][0] des_item['objects'].append(object_val) elif des_field_obj.field_type == 'variable': # need to get the predicate from the imported cells pc = ProcessCells(self.source_id, self.start_row) var_recs = pc.get_field_records(des_field_obj.field_num, example_rows) pg = ProcessGeneral(self.source_id) val_rec = pg.get_first_distinct_record(val_recs) if var_rec is not False: des_item['predicate']['label'] = var_rec['imp_cell_obj'].record pid = str(des_field_obj.field_num) + '-' + str(var_rec['rows'][0]) des_item['predicate']['id'] = pid # now need to get fields that have object values for the predicate valueof_fields = self.get_variable_valueof(des_field_obj) for val_field_obj in valueof_fields: pc = ProcessCells(self.source_id, self.start_row) val_recs = pc.get_field_records(val_field_obj.field_num, example_rows) pg = ProcessGeneral(self.source_id) val_rec = pg.get_first_distinct_record(val_recs) if val_rec is not False: object_val = LastUpdatedOrderedDict() object_val['record'] = val_rec['imp_cell_obj'].record oid = str(val_field_obj.field_num) + '-' + str(val_rec['rows'][0]) object_val['id'] = oid des_item['objects'].append(object_val) entity['descriptions'].append(des_item) example_entities.append(entity) return example_entities
def get_link_examples(self): """ Gets example entities with linking relations """ example_entities = [] self.get_link_annotations() if self.link_rels is not False: for subj_field_num, rels in self.link_rels.items(): # get some example records pc = ProcessCells(self.source_id, self.start_row) distinct_records = pc.get_field_records(subj_field_num, False) if distinct_records is not False: entity_example_count = 0 # sort the list in row_order from the import table pg = ProcessGeneral(self.source_id) distinct_records = pg.order_distinct_records(distinct_records) for row_key, dist_rec in distinct_records.items(): if entity_example_count < self.example_size: # if we're less than the example size, make # an example object entity_example_count += 1 entity = LastUpdatedOrderedDict() entity_label = dist_rec['imp_cell_obj'].record if len(entity_label) < 1: entity_label = '[BLANK]' entity_label = rels['sub_field_obj'].value_prefix + entity_label entity['label'] = entity_label entity['id'] = str(subj_field_num) + '-' + str(row_key) entity['links'] = [] example_rows = [] example_rows.append(dist_rec['rows'][0]) in_rows = [dist_rec['rows'][0]] for pred_obj in rels['pred_objs']: act_preds = [] if pred_obj['predicate_uuid'] is not False: pred_item = LastUpdatedOrderedDict() pred_item['id'] = pred_obj['predicate_uuid'] ent = Entity() found = ent.dereference(pred_obj['predicate_uuid']) if found: pred_item['label'] = ent.label else: pred_item['label'] = '[Missing predicate!]' act_preds.append(pred_item) elif pred_obj['pred_field_obj'] is not False: # linking predicate is in a field pc = ProcessCells(self.source_id, self.start_row) predicate_records= pc.get_field_records(pred_obj['pred_field_obj'].field_num, in_rows) for pred_row_key, pred_rec in predicate_records.items(): pred_item = LastUpdatedOrderedDict() pred_item['id'] = str(pred_obj['pred_field_obj'].field_num) pred_item['id'] += '-' + str(pred_rec['rows'][0]) pred_item['label'] = pred_rec['imp_cell_obj'].record if len(pred_item['label']) < 1: pred_item['label'] = '[BLANK]' if len(act_precs) < self.example_size: act_preds.append(pred_item) for pred_item in act_preds: link_item = LastUpdatedOrderedDict() link_item['predicate'] = pred_item # values are in a list, to keep consistent with descriptions link_item['object'] = False obj_field_obj = pred_obj['obj_field_obj'] # now get a value for the object from the imported cells pc = ProcessCells(self.source_id, self.start_row) obj_recs = pc.get_field_records(obj_field_obj.field_num, in_rows) pg = ProcessGeneral(self.source_id) obj_rec = pg.get_first_distinct_record(obj_recs) if obj_rec is not False: object_val = LastUpdatedOrderedDict() object_label = obj_field_obj.value_prefix if len(obj_rec['imp_cell_obj'].record) > 1: object_label += obj_rec['imp_cell_obj'].record else: object_label += '[BLANK]' object_val['label'] = object_label object_val['id'] = str(obj_rec['imp_cell_obj'].field_num) object_val['id'] += '-' + str(obj_rec['rows'][0]) link_item['object'] = object_val if len(entity['links']) < self.example_size: entity['links'].append(link_item) example_entities.append(entity) return example_entities