def operator_create_network(self, method, year, discharge_type): folder_path = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK) filename = f'{method}_{year}_{discharge_type}' filepath = f'{folder_path}/{filename}' net = NetworkSharedCare.instance(method) dict_match = { HospitalDischargeDocument.discharge_year: year, HospitalDischargeDocument.discharge_type: discharge_type } df = net.create_network(dict_match) df.to_csv(f'{filepath}.csv', index=False) df.to_hdf(f'{filepath}.hdf', key='df') G = build_from_dataframe(df) write_graph(G, folder=folder_path, filename=filename) del G dict_network = { NetworkHospitalDischargeDocument.network_method.name: method, NetworkHospitalDischargeDocument.network_year.name: year, NetworkHospitalDischargeDocument.network_type.name: discharge_type } dao = BaseDAO(NetworkHospitalDischargeDocument) dao.delete(**dict_network) # df = df.rename( columns={ NetworkHospitalDischargeDocument.node_in.db_field: NetworkHospitalDischargeDocument.node_in.name, NetworkHospitalDischargeDocument.node_out.db_field: NetworkHospitalDischargeDocument.node_out.name, NetworkHospitalDischargeDocument.weight.db_field: NetworkHospitalDischargeDocument.weight.name, }) df[NetworkHospitalDischargeDocument.network_method.name] = method df[NetworkHospitalDischargeDocument.network_year.name] = year df[NetworkHospitalDischargeDocument.network_type.name] = discharge_type # list_objects = list( # map(self.get_object_network, [(dict_network, row) for (index, row) in df.iterrows()])) # # dao.insert(list_objects) dao.init_bulk(max_bulk_insert=100000) for (i, d) in df.iterrows(): # (i, d) = next(df.iterrows()) o = NetworkHospitalDischargeDocument(**d.to_dict()) dao.insert_bulk(o) dao.exit_bulk() del df
class BaseMetricsNetwork(object): def __init__(self, name): self.name = name self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument) def get_metric(self, method, type_discharge, year, normalized=None): raise NotImplemented('get_metric') def get_network(self, method, type_discharge, year): col_net_method = NetworkHospitalDischargeDocument.network_method col_net_dicharge = NetworkHospitalDischargeDocument.network_type col_net_year = NetworkHospitalDischargeDocument.network_year col_net_node_in = NetworkHospitalDischargeDocument.node_in col_net_node_out = NetworkHospitalDischargeDocument.node_out col_net_weight = NetworkHospitalDischargeDocument.weight df = pd.DataFrame.from_dict( self.__dao_network__.obtain_pipeline( dict_match={ col_net_method: method, col_net_dicharge: type_discharge, col_net_year: year }, dict_project={ col_net_method, col_net_dicharge, col_net_year, col_net_node_in, col_net_node_out, col_net_weight }, )) return df
def operator_calculate_network_metrics(self, metric, method, year, discharge_type): dict_match = { NetworkHospitalDischargeDocument.network_method: method, NetworkHospitalDischargeDocument.network_type: discharge_type, NetworkHospitalDischargeDocument.network_year: year } dict_project = [ # NetworkHospitalDischargeDocument.network_type, # NetworkHospitalDischargeDocument.network_year, NetworkHospitalDischargeDocument.node_in, NetworkHospitalDischargeDocument.node_out, NetworkHospitalDischargeDocument.weight, ] dao_network = BaseDAO(NetworkHospitalDischargeDocument) list_network = dao_network.obtain_pipeline(dict_match=dict_match, dict_project=dict_project, math_none=False) G = nx.Graph() G.add_weighted_edges_from((tuple(i.values()) for i in list_network)) # list_metrics = NetworkMetricFactory.get_types() # for metric in list_metrics: mod_network_metric = NetworkMetricFactory.instance(metric) df_metric = mod_network_metric.get_metric(G) df_metric.columns = [ MetricsNetworkSharedCareAreaDocument.metric_value.name ] folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK_METRICS) filename_out = f'{method}_{year}_{discharge_type}_{metric}' filepath_out = f'{folder_path_out}/{filename_out}' df_metric.to_csv(f'{filepath_out}.csv', index=False) metrics_dict = { MetricsNetworkSharedCareAreaDocument.method.name: method, MetricsNetworkSharedCareAreaDocument.year.name: year, MetricsNetworkSharedCareAreaDocument.type_discharge.name: discharge_type, MetricsNetworkSharedCareAreaDocument.metric.name: metric, } list_metrics = list( map(self.get_metrics_network_shared_care, [(metrics_dict, row) for (index, row) in df_metric.iterrows()])) dao_metrics = BaseDAO(MetricsNetworkSharedCareAreaDocument) dao_metrics.delete(**metrics_dict) dao_metrics.insert(list_metrics)
def operator_calculate_metrics(self, method, year, discharge_type, type_community_detection, metric): dao = BaseDAO(MetricsSharedCareAreaDocument) folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_COMMUNITY_METRICS) # logging.info(f'{metric}') self.calculate_metrics_by_normalization(dao, discharge_type, folder_path_out, method, metric, type_community_detection, year, normalized=False)
def operator_extract_communities(self, method, year, discharge_type, type_community_detection): folder_path_in = self.get_operator_folder_path( SharedCareAreaScript.TYPE_NETWORK) filename_in = f'{method}_{year}_{discharge_type}' filepath_in = f'{folder_path_in}/{filename_in}' folder_path_out = self.get_operator_folder_path( SharedCareAreaScript.TYPE_COMMUNITIES) filename_out = f'{method}_{year}_{discharge_type}_{type_community_detection}' filepath_out = f'{folder_path_out}/{filename_out}' # exists = os.path.exists(f'{filepath_out}.csv') # if exists and not drop: # return G = nx.read_pajek(f'{filepath_in}.pajek') community_detection = CommunityDetection.build_community_detection( type_community_detection) df_communities = community_detection.find_communities( (G, filepath_in, filename_in)) df_communities.rename( columns={ SharedCareArea.zcta.db_field: 'NODE_ID', SharedCareArea.sca_id.db_field: 'COMMUNITY_ID' }) df_communities.to_csv(f'{filepath_out}.csv', index=False) df_communities.to_hdf(f'{filepath_out}.hdf', 'df') sca_dict = { SharedCareArea.method.name: method, SharedCareArea.year.name: year, SharedCareArea.type_discharge.name: discharge_type, SharedCareArea.type_community_detection.name: type_community_detection } dao = BaseDAO(SharedCareArea) dao.delete(**sca_dict) list_shared_care_areas = list( map(self.get_shared_care, [(sca_dict, row) for (index, row) in df_communities.iterrows()])) dao.insert(list_shared_care_areas)
class BaseMetricsSharedCareArea(object): def __init__(self, name): self.name = name self.__dao_shared_care_area__ = BaseDAO(SharedCareArea) def get_metric(self, method, type_discharge, year, type_community_detection, normalized=None): raise NotImplemented('get_metric') def get_shared_care_areas(self, method, type_community_detection, type_discharge, year): sca_match_dict = { SharedCareArea.method: method, SharedCareArea.type_community_detection: type_community_detection, SharedCareArea.year: year, SharedCareArea.type_discharge: type_discharge } sca_project = [SharedCareArea.zcta, SharedCareArea.sca_id] list_shared_care_areas = self.__dao_shared_care_area__.obtain_pipeline( sca_match_dict, sca_project, math_none=True) df_shared_care_areas = pd.DataFrame.from_dict(list_shared_care_areas) return df_shared_care_areas def get_all_scas(self, df_shared_care_areas, df_sca_metric): df_result = pd.DataFrame() df_result[MetricsSharedCareAreaDocument.sca_id. name] = df_shared_care_areas['SCA_ID'].unique() df_result = df_result.merge(df_sca_metric, how='left') # df_result = df_result.fillna(0) df_result = df_result[[ MetricsSharedCareAreaDocument.sca_id.name, MetricsSharedCareAreaDocument.metric_value.name ]] return df_result
class NetworkSharedCare(object): TYPE_HU = 'HU' LIST_TYPES = [TYPE_HU] def __init__(self): self.discharges = BaseDAO(HospitalDischargeDocument) @classmethod def instance(cls, network_type): if network_type == NetworkSharedCare.TYPE_HU: return HuNetworkSharedCare() else: raise NotImplemented(f'{network_type} not implemented') def get_nodes(self): pass def transform_network(self, df): return df def create_network(self, dict_match): results = self.discharges.obtain_pipeline(dict_match) nodes = self.get_nodes() df = pd.DataFrame.from_dict(results) df = df.groupby(nodes).sum( )[HospitalDischargeDocument.discharge_quantity.db_field].reset_index() df = self.transform_network(df) df.columns = self.get_columns() return df def get_columns(self): return [ NetworkHospitalDischargeDocument.node_in.db_field, NetworkHospitalDischargeDocument.node_out.db_field, NetworkHospitalDischargeDocument.weight.db_field, ]
def __init__(self, name): self.name = name self.__dao_shared_care_area__ = BaseDAO(SharedCareArea)
def __init__(self, name): self.name = name self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
def __init__(self): mod = BaseDAO(MetricsSharedCareAreaDocument) self.df = pd.DataFrame.from_dict( mod.obtain_pipeline(dict_match={}, math_none=True))
# df = df.rename( # columns={ # 'HPD_DISCHARGE_YEAR': 'Year', # 'HPD_DISCHARGE_TYPE': 'Type of Discharge', # 'HPD_FACILITY_NAME': 'Number of Hospitals', # 'HPD_PATIENT_ZIPCODE': 'Number of Patient Zip Codes', # 'HPD_DISCHARGE_QUANTITY': 'Number of Discharges' # }) # df = df.set_index(['Year', 'Type of Discharge']) # formatters = [formatter_number, formatter_number, formatter_number] # # latex = str(df.to_latex(header=True, index=True, multirow=True, formatters=formatters, escape=False)) # # print(latex) dao = BaseDAO(HospitalDischargeDocument) discharges_match_dict = { # HospitalDischargeDocument.discharge_year: year, # HospitalDischargeDocument.discharge_type: type_discharge } discharges_project = [ HospitalDischargeDocument.discharge_year, HospitalDischargeDocument.discharge_type, HospitalDischargeDocument.facility_id, HospitalDischargeDocument.patient_zcta, HospitalDischargeDocument.discharge_quantity ] list_hospital_discharges = dao.obtain_pipeline(discharges_match_dict, discharges_project,
__author__ = 'diegopinheiro' __email__ = '*****@*****.**' __github__ = 'https://github.com/diegompin' from mhs.src.dao.base_dao import BaseDAO import pandas as pd from mhs.src.dao.mhs.documents_mhs import NetworkHospitalDischargeDocument from mhs.src.dao.mhs.documents_mhs import HospitalDischargeDocument, SharedCareArea mod_network = BaseDAO(NetworkHospitalDischargeDocument) mod_sca = BaseDAO(SharedCareArea) col_net_method = NetworkHospitalDischargeDocument.network_method col_net_dicharge = NetworkHospitalDischargeDocument.network_type col_net_year = NetworkHospitalDischargeDocument.network_year col_net_node_in = NetworkHospitalDischargeDocument.node_in col_net_node_out = NetworkHospitalDischargeDocument.node_out col_net_weight = NetworkHospitalDischargeDocument.weight from mhs.src.shared_care_areas.networks.network_shared_care import NetworkSharedCare from mhs.src.library.network.community_detection import CommunityDetection value_method = NetworkSharedCare.TYPE_PINHEIRO value_discharge = HospitalDischargeDocument.TYPE_DISCHARGE_ED_ONLY value_type_community_detection = CommunityDetection.TYPE_INFOMAP value_year = 2014 df = pd.DataFrame.from_dict( mod_network.obtain_pipeline( dict_match={ col_net_method: value_method,
def __init__(self): self.discharges = BaseDAO(HospitalDischargeDocument)
class DischargeBasedMetricsSharedCareArea(BaseMetricsSharedCareArea): """ DischargeBasedMetricsSharedCareArea """ def __init__(self, name): super().__init__(name) self.__dao_discharges__ = BaseDAO(HospitalDischargeDocument) def get_discharges(self, type_discharge, year): discharges_match_dict = { HospitalDischargeDocument.discharge_year: year, HospitalDischargeDocument.discharge_type: type_discharge } discharges_project = [ HospitalDischargeDocument.patient_zcta, HospitalDischargeDocument.facility_zcta, HospitalDischargeDocument.discharge_quantity ] list_hospital_discharges = self.__dao_discharges__.obtain_pipeline( discharges_match_dict, discharges_project, math_none=False) df_hospital_discharges = pd.DataFrame.from_dict( list_hospital_discharges) return df_hospital_discharges def calculate_metric(self, df_hospital_discharges, df_shared_care_areas): raise NotImplemented() def get_metric(self, method, type_discharge, year, type_community_detection, normalized=None): df_hospital_discharges = self.get_discharges(type_discharge, year) df_shared_care_areas = self.get_shared_care_areas( method, type_community_detection, type_discharge, year) df_sca = self.calculate_metric(df_hospital_discharges, df_shared_care_areas) if normalized: df_sca = self.get_normalized(df_sca, df_hospital_discharges, df_shared_care_areas) return df_sca # def get_metric_normalized(self, method, type_discharge, year, type_community_detection): # df_hospital_discharges = self.get_discharges(type_discharge, year) # df_shared_care_areas = self.get_shared_care_areas(method, type_community_detection, type_discharge, year) # df_sca = self.calculate_metric(df_hospital_discharges, df_shared_care_areas) # # # return df_sca def get_normalized(self, df_sca, df_hospital_discharges, df_shared_care_areas): repetitions = 100 # sca_r = list() sca_r = np.zeros(repetitions) for i in range(repetitions): df_shared_care_areas_r = df_shared_care_areas.copy() df_shared_care_areas_r['SCA_ID'] = np.random.permutation( df_shared_care_areas['SCA_ID']) df_sca_r = self.calculate_metric(df_hospital_discharges, df_shared_care_areas_r) sca_r[i] = np.mean(df_sca_r['metric_value'].values) del df_shared_care_areas_r sca_r = np.ravel(sca_r) # bootstraps_sca = f_bootstrap(df_sca['metric_value'].values) # bootstraps_sca_r = f_bootstrap(sca_r) mean_r = np.mean(sca_r) std_r = np.std(sca_r) col_metric_value = MetricsSharedCareAreaDocument.metric_value.name estimates = df_sca[col_metric_value] z_score = (estimates - mean_r) / std_r df_sca[col_metric_value] = z_score return df_sca
def __init__(self, name): super(NetworkBasedMetricsSharedCareArea, self).__init__(name) self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
def __init__(self, name): super().__init__(name) self.__dao_discharges__ = BaseDAO(HospitalDischargeDocument)
__author__ = 'diegopinheiro' __email__ = '*****@*****.**' __github__ = 'https://github.com/diegompin' import pandas as pd from mhs.src.dao.base_dao import BaseDAO from mhs.src.shared_care_areas.metrics.base import MetricsSharedCareAreaFactory from mhs.src.dao.mhs.documents_mhs import MetricsSharedCareAreaDocument import statsmodels.api as sm from statsmodels.formula.api import ols import statsmodels.formula.api as smf import bootstrapped.bootstrap as bs import bootstrapped.stats_functions as bs_stats mod = BaseDAO(MetricsSharedCareAreaDocument) df = pd.DataFrame.from_dict(mod.obtain_pipeline(dict_match={}, math_none=True)) df_metric = df[ (df['MSA_METRIC'] == MetricsSharedCareAreaFactory.TYPE_LOCALIZATION_INDEX) & (df['MSA_SCA_NORMALIZED'] == False) & (pd.notnull(df['MSA_SCA_YEAR'])) & (df['MSA_SCA_TYPE_COMMUNITY_DETECTION'] == 'INFOMAP') & (df['MSA_SCA_TYPE_DISCHARGE'] == 'ED Only')] cols = [ 'MSA_SCA_METHOD', 'MSA_SCA_TYPE_DISCHARGE', 'MSA_SCA_TYPE_COMMUNITY_DETECTION', 'MSA_SCA_YEAR' ] col_metric_value = 'MSA_METRIC_VALUE' boostraps = 1000 def f_bootstrap(values):