def operator_calculate_network_metrics(self, metric, method, year, discharge_type): dict_match = { NetworkHospitalDischargeDocument.network_method: method, NetworkHospitalDischargeDocument.network_type: discharge_type, NetworkHospitalDischargeDocument.network_year: year } dict_project = [ # NetworkHospitalDischargeDocument.network_type, # NetworkHospitalDischargeDocument.network_year, NetworkHospitalDischargeDocument.node_in, NetworkHospitalDischargeDocument.node_out, NetworkHospitalDischargeDocument.weight, ] dao_network = BaseDAO(NetworkHospitalDischargeDocument) list_network = dao_network.obtain_pipeline(dict_match=dict_match, dict_project=dict_project, math_none=False) G = nx.Graph() G.add_weighted_edges_from((tuple(i.values()) for i in list_network)) # list_metrics = NetworkMetricFactory.get_types() # for metric in list_metrics: mod_network_metric = NetworkMetricFactory.instance(metric) df_metric = mod_network_metric.get_metric(G) df_metric.columns = [ MetricsNetworkSharedCareAreaDocument.metric_value.name ] folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK_METRICS) filename_out = f'{method}_{year}_{discharge_type}_{metric}' filepath_out = f'{folder_path_out}/{filename_out}' df_metric.to_csv(f'{filepath_out}.csv', index=False) metrics_dict = { MetricsNetworkSharedCareAreaDocument.method.name: method, MetricsNetworkSharedCareAreaDocument.year.name: year, MetricsNetworkSharedCareAreaDocument.type_discharge.name: discharge_type, MetricsNetworkSharedCareAreaDocument.metric.name: metric, } list_metrics = list( map(self.get_metrics_network_shared_care, [(metrics_dict, row) for (index, row) in df_metric.iterrows()])) dao_metrics = BaseDAO(MetricsNetworkSharedCareAreaDocument) dao_metrics.delete(**metrics_dict) dao_metrics.insert(list_metrics)
def operator_create_network(self, method, year, discharge_type): folder_path = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK) filename = f'{method}_{year}_{discharge_type}' filepath = f'{folder_path}/{filename}' net = NetworkSharedCare.instance(method) dict_match = { HospitalDischargeDocument.discharge_year: year, HospitalDischargeDocument.discharge_type: discharge_type } df = net.create_network(dict_match) df.to_csv(f'{filepath}.csv', index=False) df.to_hdf(f'{filepath}.hdf', key='df') G = build_from_dataframe(df) write_graph(G, folder=folder_path, filename=filename) del G dict_network = { NetworkHospitalDischargeDocument.network_method.name: method, NetworkHospitalDischargeDocument.network_year.name: year, NetworkHospitalDischargeDocument.network_type.name: discharge_type } dao = BaseDAO(NetworkHospitalDischargeDocument) dao.delete(**dict_network) # df = df.rename( columns={ NetworkHospitalDischargeDocument.node_in.db_field: NetworkHospitalDischargeDocument.node_in.name, NetworkHospitalDischargeDocument.node_out.db_field: NetworkHospitalDischargeDocument.node_out.name, NetworkHospitalDischargeDocument.weight.db_field: NetworkHospitalDischargeDocument.weight.name, }) df[NetworkHospitalDischargeDocument.network_method.name] = method df[NetworkHospitalDischargeDocument.network_year.name] = year df[NetworkHospitalDischargeDocument.network_type.name] = discharge_type # list_objects = list( # map(self.get_object_network, [(dict_network, row) for (index, row) in df.iterrows()])) # # dao.insert(list_objects) dao.init_bulk(max_bulk_insert=100000) for (i, d) in df.iterrows(): # (i, d) = next(df.iterrows()) o = NetworkHospitalDischargeDocument(**d.to_dict()) dao.insert_bulk(o) dao.exit_bulk() del df
def operator_extract_communities(self, method, year, discharge_type, type_community_detection): folder_path_in = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK) filename_in = f'{method}_{year}_{discharge_type}' filepath_in = f'{folder_path_in}/{filename_in}' folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_COMMUNITIES) filename_out = f'{method}_{year}_{discharge_type}_{type_community_detection}' filepath_out = f'{folder_path_out}/{filename_out}' # exists = os.path.exists(f'{filepath_out}.csv') # if exists and not drop: # return G = nx.read_pajek(f'{filepath_in}.pajek') community_detection = CommunityDetection.build_community_detection( type_community_detection) df_communities = community_detection.find_communities( (G, filepath_in, filename_in)) df_communities.rename( columns={ SharedCareArea.zcta.db_field: 'NODE_ID', SharedCareArea.sca_id.db_field: 'COMMUNITY_ID' }) df_communities.to_csv(f'{filepath_out}.csv', index=False) df_communities.to_hdf(f'{filepath_out}.hdf', 'df') sca_dict = { SharedCareArea.method.name: method, SharedCareArea.year.name: year, SharedCareArea.type_discharge.name: discharge_type, SharedCareArea.type_community_detection.name: type_community_detection } dao = BaseDAO(SharedCareArea) dao.delete(**sca_dict) list_shared_care_areas = list( map(self.get_shared_care, [(sca_dict, row) for (index, row) in df_communities.iterrows()])) dao.insert(list_shared_care_areas)
def operator_calculate_metrics(self, method, year, discharge_type, type_community_detection, metric): dao = BaseDAO(MetricsSharedCareAreaDocument) folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_COMMUNITY_METRICS) # logging.info(f'{metric}') self.calculate_metrics_by_normalization(dao, discharge_type, folder_path_out, method, metric, type_community_detection, year, normalized=False)
def __init__(self, name): super().__init__(name) self.__dao_discharges__ = BaseDAO(HospitalDischargeDocument)
def __init__(self, name): self.name = name self.__dao_shared_care_area__ = BaseDAO(SharedCareArea)
def __init__(self, name): self.name = name self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
def __init__(self): mod = BaseDAO(MetricsSharedCareAreaDocument) self.df = pd.DataFrame.from_dict( mod.obtain_pipeline(dict_match={}, math_none=True))
# df = df.rename( # columns={ # 'HPD_DISCHARGE_YEAR': 'Year', # 'HPD_DISCHARGE_TYPE': 'Type of Discharge', # 'HPD_FACILITY_NAME': 'Number of Hospitals', # 'HPD_PATIENT_ZIPCODE': 'Number of Patient Zip Codes', # 'HPD_DISCHARGE_QUANTITY': 'Number of Discharges' # }) # df = df.set_index(['Year', 'Type of Discharge']) # formatters = [formatter_number, formatter_number, formatter_number] # # latex = str(df.to_latex(header=True, index=True, multirow=True, formatters=formatters, escape=False)) # # print(latex) dao = BaseDAO(HospitalDischargeDocument) discharges_match_dict = { # HospitalDischargeDocument.discharge_year: year, # HospitalDischargeDocument.discharge_type: type_discharge } discharges_project = [ HospitalDischargeDocument.discharge_year, HospitalDischargeDocument.discharge_type, HospitalDischargeDocument.facility_id, HospitalDischargeDocument.patient_zcta, HospitalDischargeDocument.discharge_quantity ] list_hospital_discharges = dao.obtain_pipeline(discharges_match_dict, discharges_project,
def __init__(self, name): super(NetworkBasedMetricsSharedCareArea, self).__init__(name) self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
def __init__(self): self.discharges = BaseDAO(HospitalDischargeDocument)
__author__ = 'diegopinheiro' __email__ = '*****@*****.**' __github__ = 'https://github.com/diegompin' from mhs.src.dao.base_dao import BaseDAO import pandas as pd from mhs.src.dao.mhs.documents_mhs import NetworkHospitalDischargeDocument from mhs.src.dao.mhs.documents_mhs import HospitalDischargeDocument, SharedCareArea mod_network = BaseDAO(NetworkHospitalDischargeDocument) mod_sca = BaseDAO(SharedCareArea) col_net_method = NetworkHospitalDischargeDocument.network_method col_net_dicharge = NetworkHospitalDischargeDocument.network_type col_net_year = NetworkHospitalDischargeDocument.network_year col_net_node_in = NetworkHospitalDischargeDocument.node_in col_net_node_out = NetworkHospitalDischargeDocument.node_out col_net_weight = NetworkHospitalDischargeDocument.weight from mhs.src.shared_care_areas.networks.network_shared_care import NetworkSharedCare from mhs.src.library.network.community_detection import CommunityDetection value_method = NetworkSharedCare.TYPE_PINHEIRO value_discharge = HospitalDischargeDocument.TYPE_DISCHARGE_ED_ONLY value_type_community_detection = CommunityDetection.TYPE_INFOMAP value_year = 2014 df = pd.DataFrame.from_dict( mod_network.obtain_pipeline( dict_match={ col_net_method: value_method,
__author__ = 'diegopinheiro' __email__ = '*****@*****.**' __github__ = 'https://github.com/diegompin' import pandas as pd from mhs.src.dao.base_dao import BaseDAO from mhs.src.shared_care_areas.metrics.base import MetricsSharedCareAreaFactory from mhs.src.dao.mhs.documents_mhs import MetricsSharedCareAreaDocument import statsmodels.api as sm from statsmodels.formula.api import ols import statsmodels.formula.api as smf import bootstrapped.bootstrap as bs import bootstrapped.stats_functions as bs_stats mod = BaseDAO(MetricsSharedCareAreaDocument) df = pd.DataFrame.from_dict(mod.obtain_pipeline(dict_match={}, math_none=True)) df_metric = df[ (df['MSA_METRIC'] == MetricsSharedCareAreaFactory.TYPE_LOCALIZATION_INDEX) & (df['MSA_SCA_NORMALIZED'] == False) & (pd.notnull(df['MSA_SCA_YEAR'])) & (df['MSA_SCA_TYPE_COMMUNITY_DETECTION'] == 'INFOMAP') & (df['MSA_SCA_TYPE_DISCHARGE'] == 'ED Only')] cols = [ 'MSA_SCA_METHOD', 'MSA_SCA_TYPE_DISCHARGE', 'MSA_SCA_TYPE_COMMUNITY_DETECTION', 'MSA_SCA_YEAR' ] col_metric_value = 'MSA_METRIC_VALUE' boostraps = 1000 def f_bootstrap(values):