def operator_create_network(self, method, year, discharge_type):
        folder_path = self.get_operator_folder_path(
            SharedCareAreaScript.TYPE_NETWORK)

        filename = f'{method}_{year}_{discharge_type}'
        filepath = f'{folder_path}/{filename}'

        net = NetworkSharedCare.instance(method)
        dict_match = {
            HospitalDischargeDocument.discharge_year: year,
            HospitalDischargeDocument.discharge_type: discharge_type
        }
        df = net.create_network(dict_match)

        df.to_csv(f'{filepath}.csv', index=False)
        df.to_hdf(f'{filepath}.hdf', key='df')
        G = build_from_dataframe(df)
        write_graph(G, folder=folder_path, filename=filename)
        del G

        dict_network = {
            NetworkHospitalDischargeDocument.network_method.name: method,
            NetworkHospitalDischargeDocument.network_year.name: year,
            NetworkHospitalDischargeDocument.network_type.name: discharge_type
        }
        dao = BaseDAO(NetworkHospitalDischargeDocument)
        dao.delete(**dict_network)
        #
        df = df.rename(
            columns={
                NetworkHospitalDischargeDocument.node_in.db_field:
                NetworkHospitalDischargeDocument.node_in.name,
                NetworkHospitalDischargeDocument.node_out.db_field:
                NetworkHospitalDischargeDocument.node_out.name,
                NetworkHospitalDischargeDocument.weight.db_field:
                NetworkHospitalDischargeDocument.weight.name,
            })
        df[NetworkHospitalDischargeDocument.network_method.name] = method
        df[NetworkHospitalDischargeDocument.network_year.name] = year
        df[NetworkHospitalDischargeDocument.network_type.name] = discharge_type
        # list_objects = list(
        #     map(self.get_object_network, [(dict_network, row) for (index, row) in df.iterrows()]))
        #
        # dao.insert(list_objects)

        dao.init_bulk(max_bulk_insert=100000)
        for (i, d) in df.iterrows():
            # (i, d) = next(df.iterrows())
            o = NetworkHospitalDischargeDocument(**d.to_dict())
            dao.insert_bulk(o)
        dao.exit_bulk()
        del df
예제 #2
0
class BaseMetricsNetwork(object):
    def __init__(self, name):
        self.name = name
        self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)

    def get_metric(self, method, type_discharge, year, normalized=None):
        raise NotImplemented('get_metric')

    def get_network(self, method, type_discharge, year):
        col_net_method = NetworkHospitalDischargeDocument.network_method
        col_net_dicharge = NetworkHospitalDischargeDocument.network_type
        col_net_year = NetworkHospitalDischargeDocument.network_year
        col_net_node_in = NetworkHospitalDischargeDocument.node_in
        col_net_node_out = NetworkHospitalDischargeDocument.node_out
        col_net_weight = NetworkHospitalDischargeDocument.weight

        df = pd.DataFrame.from_dict(
            self.__dao_network__.obtain_pipeline(
                dict_match={
                    col_net_method: method,
                    col_net_dicharge: type_discharge,
                    col_net_year: year
                },
                dict_project={
                    col_net_method, col_net_dicharge, col_net_year,
                    col_net_node_in, col_net_node_out, col_net_weight
                },
            ))

        return df
    def operator_calculate_network_metrics(self, metric, method, year,
                                           discharge_type):
        dict_match = {
            NetworkHospitalDischargeDocument.network_method: method,
            NetworkHospitalDischargeDocument.network_type: discharge_type,
            NetworkHospitalDischargeDocument.network_year: year
        }
        dict_project = [
            # NetworkHospitalDischargeDocument.network_type,
            # NetworkHospitalDischargeDocument.network_year,
            NetworkHospitalDischargeDocument.node_in,
            NetworkHospitalDischargeDocument.node_out,
            NetworkHospitalDischargeDocument.weight,
        ]
        dao_network = BaseDAO(NetworkHospitalDischargeDocument)
        list_network = dao_network.obtain_pipeline(dict_match=dict_match,
                                                   dict_project=dict_project,
                                                   math_none=False)

        G = nx.Graph()
        G.add_weighted_edges_from((tuple(i.values()) for i in list_network))

        # list_metrics = NetworkMetricFactory.get_types()
        # for metric in list_metrics:
        mod_network_metric = NetworkMetricFactory.instance(metric)
        df_metric = mod_network_metric.get_metric(G)
        df_metric.columns = [
            MetricsNetworkSharedCareAreaDocument.metric_value.name
        ]

        folder_path_out = self.get_operator_folder_path(
            SharedCareAreaScript.TYPE_NETWORK_METRICS)
        filename_out = f'{method}_{year}_{discharge_type}_{metric}'
        filepath_out = f'{folder_path_out}/{filename_out}'
        df_metric.to_csv(f'{filepath_out}.csv', index=False)

        metrics_dict = {
            MetricsNetworkSharedCareAreaDocument.method.name: method,
            MetricsNetworkSharedCareAreaDocument.year.name: year,
            MetricsNetworkSharedCareAreaDocument.type_discharge.name:
            discharge_type,
            MetricsNetworkSharedCareAreaDocument.metric.name: metric,
        }
        list_metrics = list(
            map(self.get_metrics_network_shared_care,
                [(metrics_dict, row)
                 for (index, row) in df_metric.iterrows()]))
        dao_metrics = BaseDAO(MetricsNetworkSharedCareAreaDocument)
        dao_metrics.delete(**metrics_dict)
        dao_metrics.insert(list_metrics)
    def operator_calculate_metrics(self, method, year, discharge_type,
                                   type_community_detection, metric):
        dao = BaseDAO(MetricsSharedCareAreaDocument)

        folder_path_out = self.get_operator_folder_path(
            SharedCareAreaScript.TYPE_COMMUNITY_METRICS)

        # logging.info(f'{metric}')

        self.calculate_metrics_by_normalization(dao,
                                                discharge_type,
                                                folder_path_out,
                                                method,
                                                metric,
                                                type_community_detection,
                                                year,
                                                normalized=False)
    def operator_extract_communities(self, method, year, discharge_type,
                                     type_community_detection):
        folder_path_in = self.get_operator_folder_path(
            SharedCareAreaScript.TYPE_NETWORK)
        filename_in = f'{method}_{year}_{discharge_type}'
        filepath_in = f'{folder_path_in}/{filename_in}'

        folder_path_out = self.get_operator_folder_path(
            SharedCareAreaScript.TYPE_COMMUNITIES)
        filename_out = f'{method}_{year}_{discharge_type}_{type_community_detection}'
        filepath_out = f'{folder_path_out}/{filename_out}'

        # exists = os.path.exists(f'{filepath_out}.csv')

        # if exists and not drop:
        #     return

        G = nx.read_pajek(f'{filepath_in}.pajek')
        community_detection = CommunityDetection.build_community_detection(
            type_community_detection)
        df_communities = community_detection.find_communities(
            (G, filepath_in, filename_in))
        df_communities.rename(
            columns={
                SharedCareArea.zcta.db_field: 'NODE_ID',
                SharedCareArea.sca_id.db_field: 'COMMUNITY_ID'
            })

        df_communities.to_csv(f'{filepath_out}.csv', index=False)
        df_communities.to_hdf(f'{filepath_out}.hdf', 'df')

        sca_dict = {
            SharedCareArea.method.name: method,
            SharedCareArea.year.name: year,
            SharedCareArea.type_discharge.name: discharge_type,
            SharedCareArea.type_community_detection.name:
            type_community_detection
        }

        dao = BaseDAO(SharedCareArea)
        dao.delete(**sca_dict)

        list_shared_care_areas = list(
            map(self.get_shared_care,
                [(sca_dict, row)
                 for (index, row) in df_communities.iterrows()]))

        dao.insert(list_shared_care_areas)
예제 #6
0
class BaseMetricsSharedCareArea(object):
    def __init__(self, name):
        self.name = name
        self.__dao_shared_care_area__ = BaseDAO(SharedCareArea)

    def get_metric(self,
                   method,
                   type_discharge,
                   year,
                   type_community_detection,
                   normalized=None):
        raise NotImplemented('get_metric')

    def get_shared_care_areas(self, method, type_community_detection,
                              type_discharge, year):
        sca_match_dict = {
            SharedCareArea.method: method,
            SharedCareArea.type_community_detection: type_community_detection,
            SharedCareArea.year: year,
            SharedCareArea.type_discharge: type_discharge
        }
        sca_project = [SharedCareArea.zcta, SharedCareArea.sca_id]
        list_shared_care_areas = self.__dao_shared_care_area__.obtain_pipeline(
            sca_match_dict, sca_project, math_none=True)
        df_shared_care_areas = pd.DataFrame.from_dict(list_shared_care_areas)
        return df_shared_care_areas

    def get_all_scas(self, df_shared_care_areas, df_sca_metric):
        df_result = pd.DataFrame()
        df_result[MetricsSharedCareAreaDocument.sca_id.
                  name] = df_shared_care_areas['SCA_ID'].unique()
        df_result = df_result.merge(df_sca_metric, how='left')
        # df_result = df_result.fillna(0)
        df_result = df_result[[
            MetricsSharedCareAreaDocument.sca_id.name,
            MetricsSharedCareAreaDocument.metric_value.name
        ]]
        return df_result
예제 #7
0
class NetworkSharedCare(object):
    TYPE_HU = 'HU'

    LIST_TYPES = [TYPE_HU]

    def __init__(self):
        self.discharges = BaseDAO(HospitalDischargeDocument)

    @classmethod
    def instance(cls, network_type):
        if network_type == NetworkSharedCare.TYPE_HU:
            return HuNetworkSharedCare()
        else:
            raise NotImplemented(f'{network_type} not implemented')

    def get_nodes(self):
        pass

    def transform_network(self, df):
        return df

    def create_network(self, dict_match):
        results = self.discharges.obtain_pipeline(dict_match)
        nodes = self.get_nodes()
        df = pd.DataFrame.from_dict(results)
        df = df.groupby(nodes).sum(
        )[HospitalDischargeDocument.discharge_quantity.db_field].reset_index()
        df = self.transform_network(df)
        df.columns = self.get_columns()
        return df

    def get_columns(self):
        return [
            NetworkHospitalDischargeDocument.node_in.db_field,
            NetworkHospitalDischargeDocument.node_out.db_field,
            NetworkHospitalDischargeDocument.weight.db_field,
        ]
예제 #8
0
 def __init__(self, name):
     self.name = name
     self.__dao_shared_care_area__ = BaseDAO(SharedCareArea)
예제 #9
0
 def __init__(self, name):
     self.name = name
     self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
예제 #10
0
 def __init__(self):
     mod = BaseDAO(MetricsSharedCareAreaDocument)
     self.df = pd.DataFrame.from_dict(
         mod.obtain_pipeline(dict_match={}, math_none=True))
예제 #11
0
# df = df.rename(
#     columns={
#         'HPD_DISCHARGE_YEAR': 'Year',
#         'HPD_DISCHARGE_TYPE': 'Type of Discharge',
#         'HPD_FACILITY_NAME': 'Number of Hospitals',
#         'HPD_PATIENT_ZIPCODE': 'Number of Patient Zip Codes',
#         'HPD_DISCHARGE_QUANTITY': 'Number of Discharges'
#     })
# df = df.set_index(['Year', 'Type of Discharge'])
# formatters = [formatter_number, formatter_number, formatter_number]
#
# latex = str(df.to_latex(header=True, index=True, multirow=True, formatters=formatters, escape=False))
#
# print(latex)

dao = BaseDAO(HospitalDischargeDocument)

discharges_match_dict = {
    # HospitalDischargeDocument.discharge_year: year,
    # HospitalDischargeDocument.discharge_type: type_discharge
}
discharges_project = [
    HospitalDischargeDocument.discharge_year,
    HospitalDischargeDocument.discharge_type,
    HospitalDischargeDocument.facility_id,
    HospitalDischargeDocument.patient_zcta,
    HospitalDischargeDocument.discharge_quantity
]

list_hospital_discharges = dao.obtain_pipeline(discharges_match_dict,
                                               discharges_project,
__author__ = 'diegopinheiro'
__email__ = '*****@*****.**'
__github__ = 'https://github.com/diegompin'

from mhs.src.dao.base_dao import BaseDAO
import pandas as pd
from mhs.src.dao.mhs.documents_mhs import NetworkHospitalDischargeDocument
from mhs.src.dao.mhs.documents_mhs import HospitalDischargeDocument, SharedCareArea

mod_network = BaseDAO(NetworkHospitalDischargeDocument)
mod_sca = BaseDAO(SharedCareArea)

col_net_method = NetworkHospitalDischargeDocument.network_method
col_net_dicharge = NetworkHospitalDischargeDocument.network_type
col_net_year = NetworkHospitalDischargeDocument.network_year
col_net_node_in = NetworkHospitalDischargeDocument.node_in
col_net_node_out = NetworkHospitalDischargeDocument.node_out
col_net_weight = NetworkHospitalDischargeDocument.weight

from mhs.src.shared_care_areas.networks.network_shared_care import NetworkSharedCare
from mhs.src.library.network.community_detection import CommunityDetection

value_method = NetworkSharedCare.TYPE_PINHEIRO
value_discharge = HospitalDischargeDocument.TYPE_DISCHARGE_ED_ONLY
value_type_community_detection = CommunityDetection.TYPE_INFOMAP
value_year = 2014

df = pd.DataFrame.from_dict(
    mod_network.obtain_pipeline(
        dict_match={
            col_net_method: value_method,
예제 #13
0
 def __init__(self):
     self.discharges = BaseDAO(HospitalDischargeDocument)
예제 #14
0
class DischargeBasedMetricsSharedCareArea(BaseMetricsSharedCareArea):
    """
    DischargeBasedMetricsSharedCareArea
    """
    def __init__(self, name):
        super().__init__(name)
        self.__dao_discharges__ = BaseDAO(HospitalDischargeDocument)

    def get_discharges(self, type_discharge, year):
        discharges_match_dict = {
            HospitalDischargeDocument.discharge_year: year,
            HospitalDischargeDocument.discharge_type: type_discharge
        }
        discharges_project = [
            HospitalDischargeDocument.patient_zcta,
            HospitalDischargeDocument.facility_zcta,
            HospitalDischargeDocument.discharge_quantity
        ]
        list_hospital_discharges = self.__dao_discharges__.obtain_pipeline(
            discharges_match_dict, discharges_project, math_none=False)
        df_hospital_discharges = pd.DataFrame.from_dict(
            list_hospital_discharges)
        return df_hospital_discharges

    def calculate_metric(self, df_hospital_discharges, df_shared_care_areas):
        raise NotImplemented()

    def get_metric(self,
                   method,
                   type_discharge,
                   year,
                   type_community_detection,
                   normalized=None):
        df_hospital_discharges = self.get_discharges(type_discharge, year)
        df_shared_care_areas = self.get_shared_care_areas(
            method, type_community_detection, type_discharge, year)
        df_sca = self.calculate_metric(df_hospital_discharges,
                                       df_shared_care_areas)

        if normalized:
            df_sca = self.get_normalized(df_sca, df_hospital_discharges,
                                         df_shared_care_areas)

        return df_sca

    # def get_metric_normalized(self, method, type_discharge, year, type_community_detection):
    #     df_hospital_discharges = self.get_discharges(type_discharge, year)
    #     df_shared_care_areas = self.get_shared_care_areas(method, type_community_detection, type_discharge, year)
    #     df_sca = self.calculate_metric(df_hospital_discharges, df_shared_care_areas)
    #
    #
    #     return df_sca

    def get_normalized(self, df_sca, df_hospital_discharges,
                       df_shared_care_areas):
        repetitions = 100
        # sca_r = list()
        sca_r = np.zeros(repetitions)
        for i in range(repetitions):
            df_shared_care_areas_r = df_shared_care_areas.copy()
            df_shared_care_areas_r['SCA_ID'] = np.random.permutation(
                df_shared_care_areas['SCA_ID'])
            df_sca_r = self.calculate_metric(df_hospital_discharges,
                                             df_shared_care_areas_r)
            sca_r[i] = np.mean(df_sca_r['metric_value'].values)
            del df_shared_care_areas_r
        sca_r = np.ravel(sca_r)
        # bootstraps_sca = f_bootstrap(df_sca['metric_value'].values)
        # bootstraps_sca_r = f_bootstrap(sca_r)
        mean_r = np.mean(sca_r)
        std_r = np.std(sca_r)
        col_metric_value = MetricsSharedCareAreaDocument.metric_value.name
        estimates = df_sca[col_metric_value]
        z_score = (estimates - mean_r) / std_r
        df_sca[col_metric_value] = z_score
        return df_sca
예제 #15
0
 def __init__(self, name):
     super(NetworkBasedMetricsSharedCareArea, self).__init__(name)
     self.__dao_network__ = BaseDAO(NetworkHospitalDischargeDocument)
예제 #16
0
 def __init__(self, name):
     super().__init__(name)
     self.__dao_discharges__ = BaseDAO(HospitalDischargeDocument)
예제 #17
0
__author__ = 'diegopinheiro'
__email__ = '*****@*****.**'
__github__ = 'https://github.com/diegompin'
import pandas as pd

from mhs.src.dao.base_dao import BaseDAO
from mhs.src.shared_care_areas.metrics.base import MetricsSharedCareAreaFactory
from mhs.src.dao.mhs.documents_mhs import MetricsSharedCareAreaDocument
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats

mod = BaseDAO(MetricsSharedCareAreaDocument)
df = pd.DataFrame.from_dict(mod.obtain_pipeline(dict_match={}, math_none=True))
df_metric = df[
    (df['MSA_METRIC'] == MetricsSharedCareAreaFactory.TYPE_LOCALIZATION_INDEX)
    & (df['MSA_SCA_NORMALIZED'] == False)
    & (pd.notnull(df['MSA_SCA_YEAR']))
    & (df['MSA_SCA_TYPE_COMMUNITY_DETECTION'] == 'INFOMAP')
    & (df['MSA_SCA_TYPE_DISCHARGE'] == 'ED Only')]
cols = [
    'MSA_SCA_METHOD', 'MSA_SCA_TYPE_DISCHARGE',
    'MSA_SCA_TYPE_COMMUNITY_DETECTION', 'MSA_SCA_YEAR'
]
col_metric_value = 'MSA_METRIC_VALUE'
boostraps = 1000


def f_bootstrap(values):