def __init__(self, source_agregated_scores, out_file=None, n_desirable_complete_classes=1000, n_instances_already_counted=False): self._in_ag_scores = source_agregated_scores self._out_file = out_file self._n_desirable = n_desirable_complete_classes self._summary_list = [] self._n_instances_already_counted = n_instances_already_counted # Communications self._api_reader = WikidataApiReader()
class AliasesPropertiesCommand(object): def __init__(self, source_file, out_file, json_input=False): self._in_file = source_file self._out_file = out_file self._is_json_input = json_input self._api_reader = WikidataApiReader() def exec_command(self, string_return=False): sorted_result_list = [] for a_property in self._read_target_properties(): try: sorted_result_list.append(self._get_complete_property(a_property)) except: print "Error with property " + str(a_property) return JsonPropertyDumper( out_file=self._out_file, indent=4, strict_mode=True, string_return=string_return, needed_fields=[P_ID, P_LABEL, P_APPEARANCES, P_DESC], ).persist_properties(sorted_result_list) def _read_target_properties(self): if not self._is_json_input: for a_prop in Ccv01PropertiesParser(source_file=self._in_file).yield_properties(): yield a_prop else: for a_prop in Json05PropertiesParser(source_file=self._in_file).yield_properties(): yield a_prop def _get_complete_property(self, anemic_property): tmp = self._api_reader.get_property(anemic_property.id) tmp.n_appearances = anemic_property.n_appearances return tmp
class AliasesPropertiesCommand(object): def __init__(self, source_file, out_file, json_input=False): self._in_file = source_file self._out_file = out_file self._is_json_input = json_input self._api_reader = WikidataApiReader() def exec_command(self, string_return=False): sorted_result_list = [] for a_property in self._read_target_properties(): try: sorted_result_list.append( self._get_complete_property(a_property)) except: print "Error with property " + str(a_property) return JsonPropertyDumper(out_file=self._out_file, indent=4, strict_mode=True, string_return=string_return, needed_fields=[P_ID, P_LABEL, P_APPEARANCES, P_DESC]). \ persist_properties(sorted_result_list) def _read_target_properties(self): if not self._is_json_input: for a_prop in Ccv01PropertiesParser( source_file=self._in_file).yield_properties(): yield a_prop else: for a_prop in Json05PropertiesParser( source_file=self._in_file).yield_properties(): yield a_prop def _get_complete_property(self, anemic_property): tmp = self._api_reader.get_property(anemic_property.id) tmp.n_appearances = anemic_property.n_appearances return tmp
def __init__(self, source_file, out_file, json_input=False): self._in_file = source_file self._out_file = out_file self._is_json_input = json_input self._api_reader = WikidataApiReader()
class AgregatedClassSummaryCommand(object): def __init__(self, source_agregated_scores, out_file=None, n_desirable_complete_classes=1000, n_instances_already_counted=False): self._in_ag_scores = source_agregated_scores self._out_file = out_file self._n_desirable = n_desirable_complete_classes self._summary_list = [] self._n_instances_already_counted = n_instances_already_counted # Communications self._api_reader = WikidataApiReader() def exec_command(self, string_return=False): tracked_counter = 0 raw_classes_list = self._read_raw_classes() for a_class_dict in raw_classes_list: self._summary_list.append( self._get_summary_dict(a_class_dict, tracked_counter)) tracked_counter += 1 print "Total: ", tracked_counter self._serialize_results(string_return) def _get_summary_dict(self, raw_class_dict, counter): result = { KEY_ID: raw_class_dict[KEY_ID], KEY_ACCUMULATED: raw_class_dict[KEY_ACCUMULATED], KEY_LABEL: None, KEY_DESC: None } if KEY_POS_INSTANCE_COUNTING in raw_class_dict: result[KEY_POS_INSTANCE_COUNTING] = raw_class_dict[ KEY_POS_INSTANCE_COUNTING] if KEY_POS_CLASSRANK in raw_class_dict: result[KEY_POS_CLASSRANK] = raw_class_dict[KEY_POS_CLASSRANK] if not self._n_instances_already_counted: result[KEY_N_INSTANCES] = self._count_dict_instances( raw_class_dict) else: result[KEY_N_INSTANCES] = raw_class_dict[KEY_N_INSTANCES] if counter < self._n_desirable: try: tracked_entity = self._api_reader.get_entity( raw_class_dict[KEY_ID]) result[KEY_LABEL] = tracked_entity.label result[KEY_DESC] = tracked_entity.description except: print "Hubo problemas con ", raw_class_dict[KEY_ID] print counter return result def _count_dict_instances(self, raw_class_dict): resultset = set() for a_prop_key in raw_class_dict[KEY_INSTANCES]: for an_entity_id in raw_class_dict[KEY_INSTANCES][a_prop_key]: resultset.add(an_entity_id) return len(resultset) def _read_raw_classes(self): # return [{"id": "Q31", # KEY_ACCUMULATED: 8, # KEY_INSTANCES: {"P1" : ["Q5", "Q3"] # 2 instances # } # }, # {"id": "Q35", # KEY_ACCUMULATED: 8, # KEY_INSTANCES:{"P1" : ["Q5", "Q3"], # "P2" : ["Q3", "Q2", "Q1"] # 4 instances (Q3 repeated) # }}] return read_json_object(self._in_ag_scores) def _serialize_results(self, string_return): # TODO: implement string_return mode. Now, we are assuming string_return=False write_json_object(json_object=self._summary_list, path=self._out_file, indent=4)
class AgregatedClassSummaryCommand(object): def __init__(self, source_agregated_scores, out_file=None, n_desirable_complete_classes=1000, n_instances_already_counted=False): self._in_ag_scores = source_agregated_scores self._out_file = out_file self._n_desirable = n_desirable_complete_classes self._summary_list = [] self._n_instances_already_counted = n_instances_already_counted # Communications self._api_reader = WikidataApiReader() def exec_command(self, string_return=False): tracked_counter = 0 raw_classes_list = self._read_raw_classes() for a_class_dict in raw_classes_list: self._summary_list.append(self._get_summary_dict(a_class_dict, tracked_counter)) tracked_counter += 1 print "Total: ", tracked_counter self._serialize_results(string_return) def _get_summary_dict(self, raw_class_dict, counter): result = {KEY_ID: raw_class_dict[KEY_ID], KEY_ACCUMULATED: raw_class_dict[KEY_ACCUMULATED], KEY_LABEL: None, KEY_DESC: None} if KEY_POS_INSTANCE_COUNTING in raw_class_dict: result[KEY_POS_INSTANCE_COUNTING] = raw_class_dict[KEY_POS_INSTANCE_COUNTING] if KEY_POS_CLASSRANK in raw_class_dict: result[KEY_POS_CLASSRANK] = raw_class_dict[KEY_POS_CLASSRANK] if not self._n_instances_already_counted: result[KEY_N_INSTANCES] = self._count_dict_instances(raw_class_dict) else: result[KEY_N_INSTANCES] = raw_class_dict[KEY_N_INSTANCES] if counter < self._n_desirable: try: tracked_entity = self._api_reader.get_entity(raw_class_dict[KEY_ID]) result[KEY_LABEL] = tracked_entity.label result[KEY_DESC] = tracked_entity.description except: print "Hubo problemas con ", raw_class_dict[KEY_ID] print counter return result def _count_dict_instances(self, raw_class_dict): resultset = set() for a_prop_key in raw_class_dict[KEY_INSTANCES]: for an_entity_id in raw_class_dict[KEY_INSTANCES][a_prop_key]: resultset.add(an_entity_id) return len(resultset) def _read_raw_classes(self): # return [{"id": "Q31", # KEY_ACCUMULATED: 8, # KEY_INSTANCES: {"P1" : ["Q5", "Q3"] # 2 instances # } # }, # {"id": "Q35", # KEY_ACCUMULATED: 8, # KEY_INSTANCES:{"P1" : ["Q5", "Q3"], # "P2" : ["Q3", "Q2", "Q1"] # 4 instances (Q3 repeated) # }}] return read_json_object(self._in_ag_scores) def _serialize_results(self, string_return): # TODO: implement string_return mode. Now, we are assuming string_return=False write_json_object(json_object=self._summary_list, path=self._out_file, indent=4)