Ejemplo n.º 1
0
    def __init__(self, app_config=None):
        super(Prediction_Manager, self).__init__()
        self.resources_manager = Resources_Manager()
        self.statistics_manager = Statistics_Manager()
        self.load_unchanged_data()
        self.generic_tools = {
            'nn': [
                # 5 x 30
                ('NN 5 x 30 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'identity'}),

                ('NN 5 x 30 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'tanh'}),

                ('NN 5 x 30 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'relu'}),

                # 3 x 100
                ('NN 3 x 100 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'identity'}),

                ('NN 3 x 100 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'tanh'}),

                ('NN 3 x 100 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'relu'})
                ],
            'svm': [
                ('SVM poly d1 C-10^-4', {'kernel': 'poly', 'degree': 1, 'C': 0.0001}),
                ('SVM poly d1 C-100', {'kernel': 'poly', 'degree': 1, 'C': 100}),
                ('SVM poly d2', {'kernel': 'poly', 'degree': 2}),
                ('SVM poly d2 coef0-2', {'kernel': 'poly', 'degree': 2, 'coef0': 2}),
                ('SVM poly d3', {'kernel': 'poly', 'degree': 3}),
                ('SVM poly d3 coef0-2', {'kernel': 'poly', 'degree': 3, 'coef0': 2}),
                ('SVM rbf C-10^-5', {'kernel': 'rbf', 'C': 0.00001}),
                ('SVM rbf C-10^-4', {'kernel': 'rbf', 'C': 0.0001}),
                ('SVM rbf C-100', {'kernel': 'rbf', 'C': 100}),
                ('SVM linear', {'kernel': 'linear'})
                ]
        }
Ejemplo n.º 2
0
def create_data_summary():
    res_manager = Resources_Manager()

    counties = [county['name'] for county in res_manager.get_counties()]
    parameters = res_manager.get_all_parameters()

    param_place_index = 4
    for parameter in parameters:
        PARAMETERS_POSITION[parameter['index']] = (param_place_index, parameter['name'])
        param_place_index += 1

    for county in counties:
        create_summary_for_county(county, parameters, res_manager)
Ejemplo n.º 3
0
def import_data(app_config):
    # ensure db connection
    resources_manager = Resources_Manager()
    statistics_manager = Statistics_Manager()
    # import_stations(resources_manager, app_config)
    # import_stations_measurements(resources_manager, app_config)
    import_diseases(resources_manager, statistics_manager, app_config)
Ejemplo n.º 4
0
def create_disease_summary():
    resources_manager = Resources_Manager()
    most_frequently_diseases = []
    for disease in resources_manager.get_all_diseases():
        most_frequently_diseases.append((disease['code'], disease['name'],
                                         len(disease['statistics'])))

    most_frequently_diseases = sorted(most_frequently_diseases, key=lambda d: d[2], reverse=True)
    workbook = xlsxwriter.Workbook('summaries/diseases_summary.xlsx')
    worksheet = workbook.add_worksheet()
    # Headers
    for index, header in enumerate(DISEASE_SUMMARY_HEADERS):
        print index, header
        worksheet.write(0, index, header)

    row = 1;
    for disease in most_frequently_diseases:
        print disease
        worksheet.write(row, 0, disease[0])
        worksheet.write(row, 1, disease[1])
        worksheet.write(row, 2, disease[2])
        row += 1

    workbook.close()
Ejemplo n.º 5
0
from flask import Flask, jsonify, request, Response, send_from_directory
from flask_cors import CORS, cross_origin
from flask_cache import Cache


from managers.resources_manager import Resources_Manager
from managers.statistics_manager import Statistics_Manager


if __name__ == '__main__':
    app = Flask(__name__)
    CORS(app)
    cache = Cache(app,config={'CACHE_TYPE': 'simple'})

    statistics_manager = Statistics_Manager()
    resources_manager = Resources_Manager()

    @app.route('/api/diseases/disease_statistics')
    @cache.cached(timeout=2592000)
    def get_statistics_for_used_diseases():
        statistics, boundaries = statistics_manager.get_disease_county_statistics()
        print len(statistics[0])
        response = {
            'statistics': statistics,
            'boundaries': boundaries
        }

        response = jsonify(response)
        response.status_code = 200

        return response
Ejemplo n.º 6
0
from managers.resources_manager import Resources_Manager
from config_utils.config import ConfigYaml

if __name__ == '__main__':
    app_config = ConfigYaml().get_config()
    resources_manager = Resources_Manager(app_config)
    resources_manager.remove_stations()
    resources_manager.remove_parameters()
 def __init__(self, app_config):
     super(Data_Transformer, self).__init__()
     self.resources_manager = Resources_Manager(app_config)
     self.statistics_manager = Statistics_Manager()
     self.parameters = self.resources_manager.get_all_parameters()
class Data_Transformer(object):
    def __init__(self, app_config):
        super(Data_Transformer, self).__init__()
        self.resources_manager = Resources_Manager(app_config)
        self.statistics_manager = Statistics_Manager()
        self.parameters = self.resources_manager.get_all_parameters()

    def transform_data(self):
        stations = self.resources_manager.get_stations()
        common_parameters = self.find_common_parameters(stations)
        common_stations = self.find_common_stations(common_parameters)

        # Remove NOx and NO and add NO2_INDEX and AQI
        del common_parameters[NOx_INDEX]
        del common_parameters[NO_INDEX]
        used_parameters = [
            parameter for parameter in self.parameters
            if parameter['index'] in common_parameters
        ]
        used_parameters.append({
            'name': 'Dioxid de azot',
            'formula': 'NO2',
            'index': NO2_INDEX
        })
        used_parameters.append({
            'name': 'Air Quality Index',
            'formula': 'AQI',
            'index': 2000
        })
        used_stations = [
            station for station in stations
            if station['internationalCode'] in common_stations
        ]
        used_stations_statistics = create_summary_for_used_stations(
            used_stations, used_parameters, self.resources_manager)

        self.save_used_parameters_and_stations(used_parameters,
                                               used_stations_statistics)
        self.statistics_manager.update_disease_average_cases()
        self.statistics_manager.create_statistics_for_diseases()

    def find_common_parameters(self, stations):
        common_parameters = {}
        parameters_codification = {}
        for parameter in self.parameters:
            common_parameters[parameter['index']] = set()
            parameters_codification[parameter['index']] = parameter['name']

        for parameter in self.parameters:
            for station in stations:
                if parameter['index'] in station['parameters']:
                    common_parameters[parameter['index']].add(
                        station['internationalCode'])

        for parameter in self.parameters:
            if len(common_parameters[
                    parameter['index']]) < _MIN_PARAMETER_COUNT:
                del common_parameters[parameter['index']]

        for parameter_index in common_parameters:
            print '%s -> %d' % (parameters_codification[parameter_index],
                                len(common_parameters[parameter_index]))

        return common_parameters

    def find_common_stations(self, common_parameters):
        common_stations = set()
        all_possible_stations = set()

        for parameter_index in common_parameters:
            for station in common_parameters[parameter_index]:
                all_possible_stations.add(station)

        for station in all_possible_stations:
            if (station not in common_stations) and (self.check_valid_station(
                    station, common_parameters)):
                common_stations.add(station)

        return common_stations

    def check_valid_station(self, station, common_parameters):
        ok_flag = True
        for parameter_index in common_parameters:
            if station not in common_parameters[parameter_index]:
                ok_flag = False

        return ok_flag

    def save_used_parameters_and_stations(self, used_parameters,
                                          used_stations):
        for parameter in used_parameters:
            self.resources_manager.mark_parameter_used(parameter)
            if parameter['index'] not in _UNVIEWED_PARAMETERS:
                self.resources_manager.mark_parameter_viewed(parameter)

        for station in used_stations:
            self.resources_manager.update_insert_station_statistics(station)
Ejemplo n.º 9
0
from config_utils.config import ConfigYaml
from downloader.downloader import Donwloader
from data_importer import import_data

from managers.transform_data_manager import Data_Transformer
from managers.resources_manager import Resources_Manager

if __name__ == '__main__':
    app_config = ConfigYaml().get_config()
    downloader = Donwloader(app_config)
    resources_manager = Resources_Manager(app_config)
    # downloader.prepare_environment()
    # downloader.download_stations()
    # downloader.download_diseases()
    import_data(app_config)
    data_transformer = Data_Transformer(app_config)
    data_transformer.transform_data()
    resources_manager.add_county('romania')


Ejemplo n.º 10
0
class Prediction_Manager(object):
    """ Prediction Manager """
    def __init__(self, app_config=None):
        super(Prediction_Manager, self).__init__()
        self.resources_manager = Resources_Manager()
        self.statistics_manager = Statistics_Manager()
        self.load_unchanged_data()
        self.generic_tools = {
            'nn': [
                # 5 x 30
                ('NN 5 x 30 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'identity'}),

                ('NN 5 x 30 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'tanh'}),

                ('NN 5 x 30 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'relu'}),

                # 3 x 100
                ('NN 3 x 100 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'identity'}),

                ('NN 3 x 100 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'tanh'}),

                ('NN 3 x 100 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'relu'})
                ],
            'svm': [
                ('SVM poly d1 C-10^-4', {'kernel': 'poly', 'degree': 1, 'C': 0.0001}),
                ('SVM poly d1 C-100', {'kernel': 'poly', 'degree': 1, 'C': 100}),
                ('SVM poly d2', {'kernel': 'poly', 'degree': 2}),
                ('SVM poly d2 coef0-2', {'kernel': 'poly', 'degree': 2, 'coef0': 2}),
                ('SVM poly d3', {'kernel': 'poly', 'degree': 3}),
                ('SVM poly d3 coef0-2', {'kernel': 'poly', 'degree': 3, 'coef0': 2}),
                ('SVM rbf C-10^-5', {'kernel': 'rbf', 'C': 0.00001}),
                ('SVM rbf C-10^-4', {'kernel': 'rbf', 'C': 0.0001}),
                ('SVM rbf C-100', {'kernel': 'rbf', 'C': 100}),
                ('SVM linear', {'kernel': 'linear'})
                ]
        }



    # inputs: AQI_INDEX, Viteza Vant, Presiune Precipitatii, Temparatura, Boala
    # output: disease class {0, 1, 2}
    def load_unchanged_data(self):
        self.sets = []

        self.stations_counties, self.air_pollution = self.statistics_manager.air_pollution_county_statistics()
        self.diseases_statistics, self.boundaries = self.statistics_manager.get_disease_county_statistics()

        counties = [x['name'] for x in self.resources_manager.get_counties()]
        self.counties = sorted(counties)

        parameters = [x['index'] for x in self.resources_manager.get_viewed_parameters()]
        self.parameters = sorted(parameters)

        self.counties_indexes = self.statistics_manager.compute_element_index_codification(
            self.counties)
        self.parameter_indexes = self.statistics_manager.compute_element_index_codification(
            self.parameters)
        self.diseases_indexes = self.statistics_manager.get_diseases_codification()
        self.diseases = map(lambda h: h['name'], self.resources_manager.get_used_diseases())

    def create_datasets(self, disease_name, months_ago):
        datasets = []
        AQI_index = self.parameter_indexes[2000]
        wind_speed_index = self.parameter_indexes[19]
        pressure_index = self.parameter_indexes[22]
        rainfall_index = self.parameter_indexes[24]
        temp_index = self.parameter_indexes[20]
        dox_sulf_index = self.parameter_indexes[1]
        ozone_index = self.parameter_indexes[9]
        pm_aut_index = self.parameter_indexes[4]
        pm_grv_index = self.parameter_indexes[5]
        disease_index = self.diseases_indexes[disease_name]
        last_disease = None
        for county in self.counties:
            countyIndex = self.counties_indexes[county]
            # last_disease = None
            if self.stations_counties[countyIndex]:
                for year in xrange(2, _LAST_YEAR - _START_YEAR):
                    for month in xrange(0, 12):
                        new_month = month - months_ago
                        new_year = year
                        if new_month < 0:
                            new_year -= 1
                            new_month += 12

                        disease_value = (
                            self.diseases_statistics[countyIndex][year][month][disease_index])
                        last_disease_value = (
                                self.diseases_statistics[countyIndex][new_year][new_month][disease_index])
                        if disease_value > 0 and last_disease_value > 0:

                            disease_class = compute_disease_class(
                                self.boundaries[disease_index],
                                disease_value)

                            last_disease_class = compute_disease_class(
                                self.boundaries[disease_index],
                                last_disease_value)

                            # compute average of 6 months
                            AQI_avg = []
                            wind_speed_avg = []
                            pressure_avg = []
                            rainfall_avg = []
                            temp_avg = []
                            while (new_year == year and new_month <= month) or new_year < year:
                                AQI_value = (
                                    self.air_pollution[countyIndex][new_year][new_month][AQI_index])
                                AQI_avg.append(AQI_value)

                                wind_speed_value = (
                                    self.air_pollution[countyIndex][new_year][new_month][wind_speed_index])
                                if wind_speed_value != 0:
                                    wind_speed_avg.append(wind_speed_value)
                                pressure_value = (
                                    self.air_pollution[countyIndex][new_year][new_month][pressure_index])
                                if pressure_value != 0:
                                    pressure_avg.append(pressure_value)

                                rainfall_value = (
                                    self.air_pollution[countyIndex][new_year][new_month][rainfall_index])
                                if rainfall_value != 0:
                                    rainfall_avg.append(rainfall_value)
                                temp_value = (
                                    self.air_pollution[countyIndex][year][month][temp_index])
                                if temp_value != 0:
                                    temp_avg.append(temp_value)

                                new_month += 1
                                if new_month > 11:
                                    new_month %= 12
                                    new_year += 1

                                # print AQI_avg, monoxid_avg, wind_speed_avg, pressure_avg,
                                # rainfall_avg, temp_avg
                                # experiment cu clase in loc de valori pt X luni

                            aqi_class = mean(AQI_avg)

                            if wind_speed_avg:
                                wind_speed_class = compute_input_class(
                                    19,
                                    mean(wind_speed_avg))
                            else:
                                wind_speed_class = 0

                            if pressure_avg:
                                pressure_class = compute_input_class(
                                    22,
                                    mean(pressure_avg))
                            else:
                                pressure_class = 0

                            if rainfall_avg:
                                rainfall_class = compute_input_class(
                                    24,
                                    mean(rainfall_avg), month)
                            else:
                                rainfall_class = 0

                            if temp_avg:
                                temp_class = compute_input_class(20, mean(temp_avg), month)
                            else:
                                temp_class = 0

                            datasets.append(
                                ([aqi_class, wind_speed_class, pressure_class,
                                  rainfall_class, temp_class, last_disease_class],
                                 disease_class))
        return datasets

    def create_train_test_from_datasets(self, datasets):
        shuffle(datasets)
        train_input = []
        train_output = []
        test_input = []
        test_output = []
        test_classes_counter = [0] * 3
        train_classes_counter = [0] * 3
        train_set_len = int(len(datasets) * DISEASE_TRAIN_SET_PERCENTAGE)
        print len(datasets)
        for index in xrange(train_set_len):
            train_input.append(datasets[index][0])
            train_output.append(datasets[index][1])
            train_classes_counter[datasets[index][1]] += 1
        for index in xrange(train_set_len, len(datasets)):
            test_input.append(datasets[index][0])
            test_output.append(datasets[index][1])
            test_classes_counter[datasets[index][1]] += 1
        return datasets[:train_set_len], test_input, test_output, train_classes_counter, test_classes_counter

    def predict_nn(self, train_x, train_t, test_x, test_t):
        # print train_x
        scaler = StandardScaler()
        scaler.fit(train_x)
        # train_x = scaler.transform(train_x)
        # test_x = scaler.transform(test_x)
        nn = MLPClassifier(hidden_layer_sizes=(100, 100, 100))
        nn.fit(train_x, train_t)
        return nn.score(test_x, test_t)

    def predict_svm(self, train_x, train_t, test_x, test_t):
        clf = svm.SVC(decision_function_shape='ovo', **{'kernel': 'poly', 'degree': 2})
        clf.fit(train_x, train_t)
        return clf.score(test_x, test_t)

    def init_prediction_tools(self):
        prediction_tools = {}

        # load conf
        if os.path.isfile(SCORES_FILENAME):
            with open(SCORES_FILENAME, 'rb') as scores_file:
                prediction_tools = pickle.load(scores_file)
        else:
            # generate new prediction tools
            for month in xrange(1, INTERVAL_LENGHT):
                prediction_tools[month] = {}
                for disease in self.diseases:
                    prediction_tools[month][disease] = []
                    # NN params
                    dataset = self.create_datasets(disease, month)
                    for nn_params in self.generic_tools['nn']:
                        max_nn = None
                        max_score = 0.0
                        time_total = 0.0
                        for _ in xrange(10):
                            train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset)
                            new_nn = MLPClassifier(warm_start=True, **nn_params[1])
                            train_x = []
                            train_t = []
                            shuffle(train_set)
                            for index in xrange(len(train_set)):
                                train_x.append(train_set[index][0])
                                train_t.append(train_set[index][1])
                            t0 = time()
                            new_nn.fit(train_x, train_t)
                            time_total += time() - t0
                            new_score = new_nn.score(test_x, test_t)
                            print month, disease, nn_params[0], new_score
                            if new_score > max_score:
                                max_score = new_score
                                max_nn = copy.deepcopy(new_nn)
                        print 'Saving %f in time %f' % (max_score, time_total/10)
                        prediction_tools[month][disease].append((max_nn, max_score, time_total/10))


                    for svm_params in self.generic_tools['svm']:
                        max_svm = None
                        max_score = 0.0
                        time_total = 0.0
                        for index in xrange(10):
                            train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset)
                            new_svm = svm.SVC(decision_function_shape='ovo', cache_size=2, **svm_params[1])

                            train_x = []
                            train_t = []
                            shuffle(train_set)
                            for index in xrange(len(train_set)):
                                train_x.append(train_set[index][0])
                                train_t.append(train_set[index][1])
                            t0 = time()
                            new_svm.fit(train_x, train_t)
                            time_total += time() - t0
                            new_score = new_svm.score(test_x, test_t)
                            if new_score > max_score:
                                max_score = new_score
                                max_svm = copy.deepcopy(new_svm)
                            print month, disease, svm_params[0], new_score
                        print 'Saving %f in time %f' % (max_score, time_total/10)
                        prediction_tools[month][disease].append((max_svm, max_score, time_total/10))
            # save conf
            with open(SCORES_FILENAME, 'wb') as scores_file:
                pickle.dump(prediction_tools, scores_file, pickle.HIGHEST_PROTOCOL)

        self.prediction_tools = prediction_tools


    def get_prediction_tools_names(self):
        index = 0
        tools = {}
        for tool in self.generic_tools['nn']:
            tools[tool[0]] = index
            index += 1

        for tool in self.generic_tools['svm']:
            tools[tool[0]] = index
            index += 1
        return tools


    def init_pred_tools_mock(self):
        prediction_tools = {}
        for month in xrange(1, INTERVAL_LENGHT):
            prediction_tools[month] = {}
            for disease in self.diseases:
                prediction_tools[month][disease] = []
                # NN params
                for nn_params in self.generic_tools['nn']:
                    prediction_tools[month][disease].append((None, random.random(), 5*random.random()))
                for svm_params in self.generic_tools['svm']:
                    prediction_tools[month][disease].append((None, random.random(), 5*random.random()))
        self.prediction_tools_mock = prediction_tools

    def predict(self, month, disease, tool_index, predict_data):
        tool = self.prediction_tools[month][disease][tool_index][0]
        predict_data = array(predict_data).reshape(1, -1)
        predction_result = tool.predict(predict_data)[0]
        print predction_result
        return predction_result


    def create_statistics_NN(self):
        XLS_NAME = 'NN_table.xlsx'
        diseases = self.resources_manager.get_used_diseases()
        workbook = xlsxwriter.Workbook(XLS_NAME)
        worksheet = workbook.add_worksheet()

        format = workbook.add_format()
        format.set_bg_color('#FFE599')

        row = 0
        # Headers
        worksheet.write(0, 0, 'Denumire Boala')
        worksheet.write(0, 1, 'Luni folosite')
        for index in xrange(len(self.generic_tools['nn'])):
            worksheet.write(0, 2 + index, self.generic_tools['nn'][index][0])
        row += 1
        # Content
        for disease in diseases:
            worksheet.write(row, 0, disease['name'])
            for month in xrange(1, 6):
                worksheet.write(row, 1, month)
                my_max = 0
                cols_max = []
                for index in xrange(len(self.generic_tools['nn'])):
                    value = round(self.prediction_tools[month][disease['name']][index][1] * 100, 2)
                    if value > my_max:
                        my_max = value
                        cols_max = [index]
                    elif value == my_max:
                        cols_max.append(index)
                    worksheet.write(row, 2+index, value)
                for index in cols_max:
                    worksheet.write(row, 2+index, my_max, format)

                row += 1
        workbook.close()

    def create_statistics_SVM(self):
        XLS_NAME = 'SVM_table.xlsx'
        step = len(self.generic_tools['nn'])
        diseases = self.resources_manager.get_used_diseases()
        workbook = xlsxwriter.Workbook(XLS_NAME)
        worksheet = workbook.add_worksheet()

        format = workbook.add_format()
        format.set_bg_color('#FFE599')

        row = 0
        # Headers
        worksheet.write(0, 0, 'Denumire Boala')
        worksheet.write(0, 1, 'Luni folosite')
        for index in xrange(len(self.generic_tools['svm'])):
            worksheet.write(0, 2 + index, self.generic_tools['svm'][index][0])
        row += 1
        # Content
        for disease in diseases:
            worksheet.write(row, 0, disease['name'])
            for month in xrange(1, 6):
                worksheet.write(row, 1, month)
                my_max = 0
                cols_max = []
                for index in xrange(len(self.generic_tools['svm'])):
                    value = round(self.prediction_tools[month][disease['name']][step + index][1] * 100, 2)
                    if value > my_max:
                        my_max = value
                        cols_max = [index]
                    elif value == my_max:
                        cols_max.append(index)
                    worksheet.write(row, 2+index, value)
                for index in cols_max:
                    worksheet.write(row, 2+index, my_max, format)

                row += 1
        workbook.close()

    def best_NN(self):
        XLS_NAME = 'NN_best.xlsx'
        diseases = self.resources_manager.get_used_diseases()
        workbook = xlsxwriter.Workbook(XLS_NAME)
        worksheet = workbook.add_worksheet()

        format = workbook.add_format()
        format.set_bg_color('#FFE599')


        # Content
        heapq = []
        for disease in diseases:

            # worksheet.write(row, 0, disease['name'])
            for month in xrange(1, 6):
                # worksheet.write(row, 1, month)
                line = [disease['name'], month]
                my_max = 0
                cols_max = []
                for index in xrange(len(self.generic_tools['nn'])):
                    value = round(self.prediction_tools[month][disease['name']][index][1] * 100, 2)
                    line.append(value)
                    if value > my_max:
                        my_max = value
                        cols_max = [index]
                    elif value == my_max:
                        cols_max.append(index)
                    # worksheet.write(row, 2+index, value)
                # for index in cols_max:
                #     worksheet.write(row, 2+index, my_max, format)
                # row += 1
                heappush(heapq, (-1 * my_max, cols_max, line))

        row = 0
        # Headers
        worksheet.write(0, 0, 'Denumire Boala')
        worksheet.write(0, 1, 'Luni folosite')
        for index in xrange(len(self.generic_tools['nn'])):
            worksheet.write(0, 2 + index, self.generic_tools['nn'][index][0])
        row += 1

        for i in xrange(15):
            line = heappop(heapq)
            for index, value in enumerate(line[2]):
                if index-2 in line[1]:
                    worksheet.write(row, index, value, format)
                else:
                    worksheet.write(row, index, value)
            row += 1


        workbook.close()


    def best_SVM(self):
        XLS_NAME = 'SVM_best.xlsx'
        diseases = self.resources_manager.get_used_diseases()
        workbook = xlsxwriter.Workbook(XLS_NAME)
        worksheet = workbook.add_worksheet()

        step = len(self.generic_tools['nn'])

        format = workbook.add_format()
        format.set_bg_color('#FFE599')


        # Content
        heapq = []
        for disease in diseases:

            # worksheet.write(row, 0, disease['name'])
            for month in xrange(1, 6):
                # worksheet.write(row, 1, month)
                line = [disease['name'], month]
                my_max = 0
                cols_max = []
                for index in xrange(len(self.generic_tools['svm'])):
                    value = round(self.prediction_tools[month][disease['name']][step + index][1] * 100, 2)
                    line.append(value)
                    if value > my_max:
                        my_max = value
                        cols_max = [index]
                    elif value == my_max:
                        cols_max.append(index)
                    # worksheet.write(row, 2+index, value)
                # for index in cols_max:
                #     worksheet.write(row, 2+index, my_max, format)
                # row += 1
                heappush(heapq, (-1 * my_max, cols_max, line))

        row = 0
        # Headers
        worksheet.write(0, 0, 'Denumire Boala')
        worksheet.write(0, 1, 'Luni folosite')
        for index in xrange(len(self.generic_tools['svm'])):
            worksheet.write(0, 2 + index, self.generic_tools['svm'][index][0])
        row += 1

        for i in xrange(15):
            line = heappop(heapq)
            for index, value in enumerate(line[2]):
                if index-2 in line[1]:
                    worksheet.write(row, index, value, format)
                else:
                    worksheet.write(row, index, value)
            row += 1

        workbook.close()



    def sigmoid_effects(self):
        conf = {
            'Boala interstitiala pulmonara': [1],
            'Accident vascular cerebral': [1, 2, 3, 4],
            'Neoplasm pulmonar': [2],
            'Tulburari vasculare': [4, 3]
        }

        tools = [
            {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'logistic'},
            {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'logistic'},
            {'kernel': 'sigmoid'}
        ]

        # headers
        XLS_NAME = 'Sigmoid_table.xlsx'
        workbook = xlsxwriter.Workbook(XLS_NAME)
        worksheet = workbook.add_worksheet()

        # format = workbook.add_format()
        # format.set_bg_color('#FFE599')

        # Headers
        worksheet.write(0, 0, 'Denumire Boala')
        worksheet.write(0, 1, 'Luni folosite')
        worksheet.write(0, 2, 'F1')
        worksheet.write(0, 3, 'F2')
        worksheet.write(0, 4, 'F3')

        row = 1
        for disease in conf:
            worksheet.write(row, 0, disease)
            for month in conf[disease]:
                worksheet.write(row, 1, month)
                dataset = self.create_datasets(disease, month)
                train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset)
                train_x = []
                train_t = []
                shuffle(train_set)
                for index in xrange(len(train_set)):
                    train_x.append(train_set[index][0])
                    train_t.append(train_set[index][1])
                model = MLPClassifier(**tools[0])
                model.fit(train_x, train_t)
                score = model.score(test_x, test_t)
                worksheet.write(row, 2, round(score * 100, 2))

                model = MLPClassifier(**tools[1])
                model.fit(train_x, train_t)
                score = model.score(test_x, test_t)
                worksheet.write(row, 3, round(score * 100, 2))

                model = svm.SVC(**tools[2])
                model.fit(train_x, train_t)
                score = model.score(test_x, test_t)
                worksheet.write(row, 4, round(score * 100, 2))
                row += 1
        workbook.close()



    def create_plots_SVM(self):
        pylab.axis([0, 6, 0, 1])

        # Cost1 vs cost2 plot
        disease = 'Accident vascular cerebral'
        x = [1, 2, 3, 4, 5]
        c1 = []
        c2 = []
        for i in x:
            c1.append(self.prediction_tools[i][disease][12][2])
            c2.append(self.prediction_tools[i][disease][14][2])

        pylab.plot(x,c1, label='Cost 0.0001')
        pylab.plot(x,c2, label='Cost 100')

        pylab.legend(loc='upper right')
        pylab.xlabel('Luna')
        pylab.ylabel('Timp de antrenare (s)')

        pylab.show()
Ejemplo n.º 11
0
 def __init__(self):
     super(Statistics_Manager, self).__init__()
     self.resources_manager = Resources_Manager()
     self.counties = self.resources_manager.get_counties()
Ejemplo n.º 12
0
class Statistics_Manager(object):
    """Statistics Manager for Stations"""

    def __init__(self):
        super(Statistics_Manager, self).__init__()
        self.resources_manager = Resources_Manager()
        self.counties = self.resources_manager.get_counties()

    @staticmethod
    def compute_air_quality_index(params_codif, params_values):
        max_index = -1
        for param_indentifier in AQI_INDEXES:
            index = 0
            param_value = params_values[params_codif[param_indentifier]]
            if param_value <= 0:
                continue
            for val in AQI_INDEXES[param_indentifier]:
                if param_value < val:
                    break
                index += 1
            max_index = max(max_index, index)
        return max_index + 1

    @staticmethod
    def compute_element_index_codification(elements):
        elements_indexes = {}
        index = 0
        for element in elements:
            elements_indexes[element] = index
            index += 1
        return elements_indexes

    """
    This function must not be here.
    It will be placed in statistics_manager.
    """
    def compute_statistics_for_station(self, station, parameters, resources_manager):
        station_statistics = {}
        station_statistics['internationalCode'] = station['internationalCode']
        station_statistics['county'] = station['county']
        statistics = []
        for year in xrange(_START_YEAR, _LAST_YEAR):
            for month in MONTHS:
                parameter_value_dict = {}
                for parameter in parameters:
                    if parameter['index'] == NO2_INDEX:
                        measurements_NOx = resources_manager.get_measurements_for_station(
                            station['internationalCode'], year, month, NOx_INDEX)
                        measurements_NO = resources_manager.get_measurements_for_station(
                            station['internationalCode'], year, month, NO_INDEX)

                        if measurements_NO and measurements_NOx:
                            NOx_avg = compute_average(measurements_NOx[0]['measurements'])
                            NO_avg = compute_average(measurements_NO[0]['measurements'])
                            NO2_avg = NOx_avg - NO_avg
                            if NO2_avg > 0 and NOx_avg > 0 and NO_avg > 0:
                                parameter_value_dict[str(NO2_INDEX)] = NO2_avg
                            else:
                                parameter_value_dict[str(NO2_INDEX)] = 0

                        else:
                            parameter_value_dict[str(NO2_INDEX)] = 0
                    else:
                        measurements = resources_manager.get_measurements_for_station(
                            station['internationalCode'], year, month, parameter['index'])
                        if measurements:
                            if parameter['index'] == RAINFALL_INDEX:
                                parameter_value_dict[str(parameter['index'])] = compute_sum(
                                    measurements[0]['measurements'])
                            else:
                                parameter_value_dict[str(parameter['index'])] = compute_average(
                                    measurements[0]['measurements'])
                        else:
                            parameter_value_dict[str(parameter['index'])] = 0
                statistics.append({'year': year, 'month': month, 'values': parameter_value_dict})

        station_statistics['statistics'] = statistics
        return station_statistics

    def create_statistics_for_diseases(self):
        counties = self.resources_manager.get_counties()

        for disease_category in DISEASE_CONFIGURATION:
            for disease_name in DISEASE_CONFIGURATION[disease_category]:
                initial_disease_statistics = []
                used_disease_object = {}
                used_disease_object['name'] = disease_name
                used_disease_object['category'] = disease_category
                used_disease_object['avg_cases'] = 0.0
                statistics = {}

                for disease_code in DISEASE_CONFIGURATION[disease_category][disease_name]:
                    disease = self.resources_manager.get_disease_by_code(disease_code)
                    initial_disease_statistics.append(disease)

                for county in counties:
                    county_statistic_obj = {}
                    for year in xrange(_START_YEAR, _LAST_YEAR):
                        county_statistic_obj[str(year)] = {}
                        for month in range(12):
                            county_statistic_obj[str(year)][str(month)] = 0
                    statistics[county['name']] = county_statistic_obj

                for disease in initial_disease_statistics:
                    used_disease_object['avg_cases'] += disease['avg_cases']
                    for statistic in disease['statistics']:
                        date = datetime.strptime(statistic['start_date'], '%d.%m.%Y')
                        statistic_county = (CANONIC_COUNTY_NAMES[statistic['county']]
                                            if statistic['county'] in CANONIC_COUNTY_NAMES
                                            else statistic['county'])
                        statistic_year = date.strftime('%Y')
                        statistic_month = str(int(date.strftime('%m'))-1)
                        statistics[statistic_county][statistic_year][statistic_month] += (
                            statistic['total_number_cases'])
                used_disease_object['statistics'] = statistics
                self.resources_manager.insert_update_disease_statistics(used_disease_object)


    def air_pollution_county_statistics(self):
        """Return statistics between air_pollution and counties.

        Returns:
            statistics: 4-dimensional matrix. stastics[county][year][month][parameter] = value

        """

        counties = [x['name'] for x in self.resources_manager.get_counties()]
        counties = sorted(counties)

        parameters = [x['index'] for x in self.resources_manager.get_viewed_parameters()]
        parameters = sorted(parameters)
        # remove AQI
        parameters.remove(AQI_INDEX)

        counties_indexes = self.compute_element_index_codification(counties)
        parameter_indexes = self.compute_element_index_codification(parameters)

        stations_statistics = self.resources_manager.get_stations_statistics()

        statistics = [[[[0 for _ in parameters] for _ in range(12)] for _ in range(2010, 2017)]
                      for _ in counties]
        statistics_stations = [[[[0 for _ in parameters] for _ in range(12)] for _ in range(2010, 2017)]
                      for _ in counties]

        stations_in_counties = [set() for county in counties]

        print parameter_indexes
        print parameters

        # Create 4d matrix of statistics for each county
        for station in stations_statistics:
            county_index = counties_indexes[station['county']]
            stations_in_counties[county_index].add(station['internationalCode'])
            for statistic in station['statistics']:
                statistic_year = int(statistic['year'])
                if statistic_year >= _START_YEAR:
                    year_index = statistic_year - _START_YEAR
                    month = int(statistic['month']) - 1
                    for parameter in statistic['values']:
                        if int(parameter) in parameter_indexes:
                            parameter_index = parameter_indexes[int(parameter)]
                            statistics[county_index][year_index][month][parameter_index] += (
                                statistic['values'][parameter])
                            if statistic['values'][parameter] > 0:
                                statistics_stations[county_index][year_index][month][parameter_index] += 1

        for county in counties:
            county_index = counties_indexes[county]
            for year_index in range(_LAST_YEAR - _START_YEAR):
                for month_index in range(12):
                    for param_index in range(len(parameters)-1):
                        stations_no = (
                            statistics_stations[county_index][year_index][month][parameter_index])
                        if stations_no > 0:
                            statistics[county_index][year_index][month_index][param_index] /= (
                                stations_no)
                    aqi_index = self.compute_air_quality_index(
                        parameter_indexes,
                        statistics[county_index][year_index][month_index])
                    statistics[county_index][year_index][month_index].append(aqi_index)


        return stations_in_counties, statistics

    def get_disease_county_statistics(self):
        """Return statistics between diseases and counties.

        Returns:
            statistics: 4-dimensional matrix. stastics[county][year][month][disease] = value
        """
        counties = [x['name'] for x in self.resources_manager.get_counties()]
        counties = sorted(counties)
        counties_indexes = self.compute_element_index_codification(counties)

        used_diseases = self.resources_manager.get_used_diseases()
        sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases])
        used_diseases_indexes = self.compute_element_index_codification(sorted_used_diseases_names)
        statistics = [[[[0 for _ in used_diseases] for _ in range(12)] for _ in
                       range(_START_YEAR, _LAST_YEAR)] for _ in counties]

        for disease in used_diseases:
            disease_index = used_diseases_indexes[disease['name']]
            for county in disease['statistics']:
                county_index = counties_indexes[county]
                for year in disease['statistics'][county]:
                    statistic_year = int(year)
                    if statistic_year >= _START_YEAR:
                        year_index = statistic_year - _START_YEAR
                        for month in disease['statistics'][county][year]:
                            month_index = int(month)
                            statistics[county_index][year_index][month_index][disease_index] = (
                                disease['statistics'][county][year][month])

        diseases_boundaries = {}
        for disease in used_diseases:
            disease_index = used_diseases_indexes[disease['name']]
            disease_values = set()
            for county in disease['statistics']:
                for year in disease['statistics'][county]:
                    if statistic_year >= _START_YEAR:
                        for month in disease['statistics'][county][year]:
                            disease_values.add(disease['statistics'][county][year][month])
            disease_values = sorted(disease_values)
            disease_values_len = len(disease_values)
            diseases_boundaries[disease_index] = [int(x * disease_values_len) for x
                                                  in DISEASE_BOUNDARIES[disease['name']]]
        return statistics, diseases_boundaries

    def update_disease_average_cases(self):
        for disease in self.resources_manager.get_all_diseases():
            avg = 0.0
            for statistic in disease['statistics']:
                avg += statistic['total_number_cases']
            disease['avg_cases'] = avg / len(disease['statistics'])
            self.resources_manager.update_disease_metadata(disease)

    def get_diseases_codification(self):
        used_diseases = self.resources_manager.get_used_diseases()
        sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases])

        used_diseases_indexes = self.compute_element_index_codification(
            sorted_used_diseases_names)
        return used_diseases_indexes
Ejemplo n.º 13
0
            disease_values = sorted(disease_values)
            disease_values_len = len(disease_values)
            diseases_boundaries[disease_index] = [int(x * disease_values_len) for x
                                                  in DISEASE_BOUNDARIES[disease['name']]]
        return statistics, diseases_boundaries

    def update_disease_average_cases(self):
        for disease in self.resources_manager.get_all_diseases():
            avg = 0.0
            for statistic in disease['statistics']:
                avg += statistic['total_number_cases']
            disease['avg_cases'] = avg / len(disease['statistics'])
            self.resources_manager.update_disease_metadata(disease)

    def get_diseases_codification(self):
        used_diseases = self.resources_manager.get_used_diseases()
        sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases])

        used_diseases_indexes = self.compute_element_index_codification(
            sorted_used_diseases_names)
        return used_diseases_indexes


if __name__ == '__main__':
    sm = Statistics_Manager()
    rm = Resources_Manager()
    print 'Computing average'
    # sm.update_disease_average_cases()
    print 'Done'
    # sm.create_statistics_for_diseases()
Ejemplo n.º 14
0
from flask import Flask, jsonify, request, json, Response, send_from_directory
from flask_cors import CORS, cross_origin
import json
from flask_cache import Cache
import numpy

from managers.prediction_manager import Prediction_Manager
from managers.resources_manager import Resources_Manager

if __name__ == '__main__':
    app = Flask(__name__)
    CORS(app)
    cache = Cache(app, config={'CACHE_TYPE': 'simple'})

    prediction_manager = Prediction_Manager()
    resources_manager = Resources_Manager()

    diseases_names = [x['name'] for x in resources_manager.get_used_diseases()]
    prediction_manager.init_prediction_tools()

    @app.route('/api/prediction/info')
    @cache.cached(timeout=2592000)
    def get_prediction_info():
        prediction_tools = prediction_manager.get_prediction_tools_names()
        scores = {}
        for month in xrange(1, 6):
            scores[month] = {}
            for disease_name in diseases_names:
                scores[month][disease_name] = []
                for index in xrange(len(prediction_tools)):
                    scores[month][disease_name].append(