def __init__(self, app_config=None): super(Prediction_Manager, self).__init__() self.resources_manager = Resources_Manager() self.statistics_manager = Statistics_Manager() self.load_unchanged_data() self.generic_tools = { 'nn': [ # 5 x 30 ('NN 5 x 30 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'identity'}), ('NN 5 x 30 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'tanh'}), ('NN 5 x 30 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'relu'}), # 3 x 100 ('NN 3 x 100 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'identity'}), ('NN 3 x 100 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'tanh'}), ('NN 3 x 100 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'relu'}) ], 'svm': [ ('SVM poly d1 C-10^-4', {'kernel': 'poly', 'degree': 1, 'C': 0.0001}), ('SVM poly d1 C-100', {'kernel': 'poly', 'degree': 1, 'C': 100}), ('SVM poly d2', {'kernel': 'poly', 'degree': 2}), ('SVM poly d2 coef0-2', {'kernel': 'poly', 'degree': 2, 'coef0': 2}), ('SVM poly d3', {'kernel': 'poly', 'degree': 3}), ('SVM poly d3 coef0-2', {'kernel': 'poly', 'degree': 3, 'coef0': 2}), ('SVM rbf C-10^-5', {'kernel': 'rbf', 'C': 0.00001}), ('SVM rbf C-10^-4', {'kernel': 'rbf', 'C': 0.0001}), ('SVM rbf C-100', {'kernel': 'rbf', 'C': 100}), ('SVM linear', {'kernel': 'linear'}) ] }
def create_data_summary(): res_manager = Resources_Manager() counties = [county['name'] for county in res_manager.get_counties()] parameters = res_manager.get_all_parameters() param_place_index = 4 for parameter in parameters: PARAMETERS_POSITION[parameter['index']] = (param_place_index, parameter['name']) param_place_index += 1 for county in counties: create_summary_for_county(county, parameters, res_manager)
def import_data(app_config): # ensure db connection resources_manager = Resources_Manager() statistics_manager = Statistics_Manager() # import_stations(resources_manager, app_config) # import_stations_measurements(resources_manager, app_config) import_diseases(resources_manager, statistics_manager, app_config)
def create_disease_summary(): resources_manager = Resources_Manager() most_frequently_diseases = [] for disease in resources_manager.get_all_diseases(): most_frequently_diseases.append((disease['code'], disease['name'], len(disease['statistics']))) most_frequently_diseases = sorted(most_frequently_diseases, key=lambda d: d[2], reverse=True) workbook = xlsxwriter.Workbook('summaries/diseases_summary.xlsx') worksheet = workbook.add_worksheet() # Headers for index, header in enumerate(DISEASE_SUMMARY_HEADERS): print index, header worksheet.write(0, index, header) row = 1; for disease in most_frequently_diseases: print disease worksheet.write(row, 0, disease[0]) worksheet.write(row, 1, disease[1]) worksheet.write(row, 2, disease[2]) row += 1 workbook.close()
from flask import Flask, jsonify, request, Response, send_from_directory from flask_cors import CORS, cross_origin from flask_cache import Cache from managers.resources_manager import Resources_Manager from managers.statistics_manager import Statistics_Manager if __name__ == '__main__': app = Flask(__name__) CORS(app) cache = Cache(app,config={'CACHE_TYPE': 'simple'}) statistics_manager = Statistics_Manager() resources_manager = Resources_Manager() @app.route('/api/diseases/disease_statistics') @cache.cached(timeout=2592000) def get_statistics_for_used_diseases(): statistics, boundaries = statistics_manager.get_disease_county_statistics() print len(statistics[0]) response = { 'statistics': statistics, 'boundaries': boundaries } response = jsonify(response) response.status_code = 200 return response
from managers.resources_manager import Resources_Manager from config_utils.config import ConfigYaml if __name__ == '__main__': app_config = ConfigYaml().get_config() resources_manager = Resources_Manager(app_config) resources_manager.remove_stations() resources_manager.remove_parameters()
def __init__(self, app_config): super(Data_Transformer, self).__init__() self.resources_manager = Resources_Manager(app_config) self.statistics_manager = Statistics_Manager() self.parameters = self.resources_manager.get_all_parameters()
class Data_Transformer(object): def __init__(self, app_config): super(Data_Transformer, self).__init__() self.resources_manager = Resources_Manager(app_config) self.statistics_manager = Statistics_Manager() self.parameters = self.resources_manager.get_all_parameters() def transform_data(self): stations = self.resources_manager.get_stations() common_parameters = self.find_common_parameters(stations) common_stations = self.find_common_stations(common_parameters) # Remove NOx and NO and add NO2_INDEX and AQI del common_parameters[NOx_INDEX] del common_parameters[NO_INDEX] used_parameters = [ parameter for parameter in self.parameters if parameter['index'] in common_parameters ] used_parameters.append({ 'name': 'Dioxid de azot', 'formula': 'NO2', 'index': NO2_INDEX }) used_parameters.append({ 'name': 'Air Quality Index', 'formula': 'AQI', 'index': 2000 }) used_stations = [ station for station in stations if station['internationalCode'] in common_stations ] used_stations_statistics = create_summary_for_used_stations( used_stations, used_parameters, self.resources_manager) self.save_used_parameters_and_stations(used_parameters, used_stations_statistics) self.statistics_manager.update_disease_average_cases() self.statistics_manager.create_statistics_for_diseases() def find_common_parameters(self, stations): common_parameters = {} parameters_codification = {} for parameter in self.parameters: common_parameters[parameter['index']] = set() parameters_codification[parameter['index']] = parameter['name'] for parameter in self.parameters: for station in stations: if parameter['index'] in station['parameters']: common_parameters[parameter['index']].add( station['internationalCode']) for parameter in self.parameters: if len(common_parameters[ parameter['index']]) < _MIN_PARAMETER_COUNT: del common_parameters[parameter['index']] for parameter_index in common_parameters: print '%s -> %d' % (parameters_codification[parameter_index], len(common_parameters[parameter_index])) return common_parameters def find_common_stations(self, common_parameters): common_stations = set() all_possible_stations = set() for parameter_index in common_parameters: for station in common_parameters[parameter_index]: all_possible_stations.add(station) for station in all_possible_stations: if (station not in common_stations) and (self.check_valid_station( station, common_parameters)): common_stations.add(station) return common_stations def check_valid_station(self, station, common_parameters): ok_flag = True for parameter_index in common_parameters: if station not in common_parameters[parameter_index]: ok_flag = False return ok_flag def save_used_parameters_and_stations(self, used_parameters, used_stations): for parameter in used_parameters: self.resources_manager.mark_parameter_used(parameter) if parameter['index'] not in _UNVIEWED_PARAMETERS: self.resources_manager.mark_parameter_viewed(parameter) for station in used_stations: self.resources_manager.update_insert_station_statistics(station)
from config_utils.config import ConfigYaml from downloader.downloader import Donwloader from data_importer import import_data from managers.transform_data_manager import Data_Transformer from managers.resources_manager import Resources_Manager if __name__ == '__main__': app_config = ConfigYaml().get_config() downloader = Donwloader(app_config) resources_manager = Resources_Manager(app_config) # downloader.prepare_environment() # downloader.download_stations() # downloader.download_diseases() import_data(app_config) data_transformer = Data_Transformer(app_config) data_transformer.transform_data() resources_manager.add_county('romania')
class Prediction_Manager(object): """ Prediction Manager """ def __init__(self, app_config=None): super(Prediction_Manager, self).__init__() self.resources_manager = Resources_Manager() self.statistics_manager = Statistics_Manager() self.load_unchanged_data() self.generic_tools = { 'nn': [ # 5 x 30 ('NN 5 x 30 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'identity'}), ('NN 5 x 30 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'tanh'}), ('NN 5 x 30 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'relu'}), # 3 x 100 ('NN 3 x 100 neurons identity', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'identity'}), ('NN 3 x 100 neurons tanh', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'tanh'}), ('NN 3 x 100 neurons relu', {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'relu'}) ], 'svm': [ ('SVM poly d1 C-10^-4', {'kernel': 'poly', 'degree': 1, 'C': 0.0001}), ('SVM poly d1 C-100', {'kernel': 'poly', 'degree': 1, 'C': 100}), ('SVM poly d2', {'kernel': 'poly', 'degree': 2}), ('SVM poly d2 coef0-2', {'kernel': 'poly', 'degree': 2, 'coef0': 2}), ('SVM poly d3', {'kernel': 'poly', 'degree': 3}), ('SVM poly d3 coef0-2', {'kernel': 'poly', 'degree': 3, 'coef0': 2}), ('SVM rbf C-10^-5', {'kernel': 'rbf', 'C': 0.00001}), ('SVM rbf C-10^-4', {'kernel': 'rbf', 'C': 0.0001}), ('SVM rbf C-100', {'kernel': 'rbf', 'C': 100}), ('SVM linear', {'kernel': 'linear'}) ] } # inputs: AQI_INDEX, Viteza Vant, Presiune Precipitatii, Temparatura, Boala # output: disease class {0, 1, 2} def load_unchanged_data(self): self.sets = [] self.stations_counties, self.air_pollution = self.statistics_manager.air_pollution_county_statistics() self.diseases_statistics, self.boundaries = self.statistics_manager.get_disease_county_statistics() counties = [x['name'] for x in self.resources_manager.get_counties()] self.counties = sorted(counties) parameters = [x['index'] for x in self.resources_manager.get_viewed_parameters()] self.parameters = sorted(parameters) self.counties_indexes = self.statistics_manager.compute_element_index_codification( self.counties) self.parameter_indexes = self.statistics_manager.compute_element_index_codification( self.parameters) self.diseases_indexes = self.statistics_manager.get_diseases_codification() self.diseases = map(lambda h: h['name'], self.resources_manager.get_used_diseases()) def create_datasets(self, disease_name, months_ago): datasets = [] AQI_index = self.parameter_indexes[2000] wind_speed_index = self.parameter_indexes[19] pressure_index = self.parameter_indexes[22] rainfall_index = self.parameter_indexes[24] temp_index = self.parameter_indexes[20] dox_sulf_index = self.parameter_indexes[1] ozone_index = self.parameter_indexes[9] pm_aut_index = self.parameter_indexes[4] pm_grv_index = self.parameter_indexes[5] disease_index = self.diseases_indexes[disease_name] last_disease = None for county in self.counties: countyIndex = self.counties_indexes[county] # last_disease = None if self.stations_counties[countyIndex]: for year in xrange(2, _LAST_YEAR - _START_YEAR): for month in xrange(0, 12): new_month = month - months_ago new_year = year if new_month < 0: new_year -= 1 new_month += 12 disease_value = ( self.diseases_statistics[countyIndex][year][month][disease_index]) last_disease_value = ( self.diseases_statistics[countyIndex][new_year][new_month][disease_index]) if disease_value > 0 and last_disease_value > 0: disease_class = compute_disease_class( self.boundaries[disease_index], disease_value) last_disease_class = compute_disease_class( self.boundaries[disease_index], last_disease_value) # compute average of 6 months AQI_avg = [] wind_speed_avg = [] pressure_avg = [] rainfall_avg = [] temp_avg = [] while (new_year == year and new_month <= month) or new_year < year: AQI_value = ( self.air_pollution[countyIndex][new_year][new_month][AQI_index]) AQI_avg.append(AQI_value) wind_speed_value = ( self.air_pollution[countyIndex][new_year][new_month][wind_speed_index]) if wind_speed_value != 0: wind_speed_avg.append(wind_speed_value) pressure_value = ( self.air_pollution[countyIndex][new_year][new_month][pressure_index]) if pressure_value != 0: pressure_avg.append(pressure_value) rainfall_value = ( self.air_pollution[countyIndex][new_year][new_month][rainfall_index]) if rainfall_value != 0: rainfall_avg.append(rainfall_value) temp_value = ( self.air_pollution[countyIndex][year][month][temp_index]) if temp_value != 0: temp_avg.append(temp_value) new_month += 1 if new_month > 11: new_month %= 12 new_year += 1 # print AQI_avg, monoxid_avg, wind_speed_avg, pressure_avg, # rainfall_avg, temp_avg # experiment cu clase in loc de valori pt X luni aqi_class = mean(AQI_avg) if wind_speed_avg: wind_speed_class = compute_input_class( 19, mean(wind_speed_avg)) else: wind_speed_class = 0 if pressure_avg: pressure_class = compute_input_class( 22, mean(pressure_avg)) else: pressure_class = 0 if rainfall_avg: rainfall_class = compute_input_class( 24, mean(rainfall_avg), month) else: rainfall_class = 0 if temp_avg: temp_class = compute_input_class(20, mean(temp_avg), month) else: temp_class = 0 datasets.append( ([aqi_class, wind_speed_class, pressure_class, rainfall_class, temp_class, last_disease_class], disease_class)) return datasets def create_train_test_from_datasets(self, datasets): shuffle(datasets) train_input = [] train_output = [] test_input = [] test_output = [] test_classes_counter = [0] * 3 train_classes_counter = [0] * 3 train_set_len = int(len(datasets) * DISEASE_TRAIN_SET_PERCENTAGE) print len(datasets) for index in xrange(train_set_len): train_input.append(datasets[index][0]) train_output.append(datasets[index][1]) train_classes_counter[datasets[index][1]] += 1 for index in xrange(train_set_len, len(datasets)): test_input.append(datasets[index][0]) test_output.append(datasets[index][1]) test_classes_counter[datasets[index][1]] += 1 return datasets[:train_set_len], test_input, test_output, train_classes_counter, test_classes_counter def predict_nn(self, train_x, train_t, test_x, test_t): # print train_x scaler = StandardScaler() scaler.fit(train_x) # train_x = scaler.transform(train_x) # test_x = scaler.transform(test_x) nn = MLPClassifier(hidden_layer_sizes=(100, 100, 100)) nn.fit(train_x, train_t) return nn.score(test_x, test_t) def predict_svm(self, train_x, train_t, test_x, test_t): clf = svm.SVC(decision_function_shape='ovo', **{'kernel': 'poly', 'degree': 2}) clf.fit(train_x, train_t) return clf.score(test_x, test_t) def init_prediction_tools(self): prediction_tools = {} # load conf if os.path.isfile(SCORES_FILENAME): with open(SCORES_FILENAME, 'rb') as scores_file: prediction_tools = pickle.load(scores_file) else: # generate new prediction tools for month in xrange(1, INTERVAL_LENGHT): prediction_tools[month] = {} for disease in self.diseases: prediction_tools[month][disease] = [] # NN params dataset = self.create_datasets(disease, month) for nn_params in self.generic_tools['nn']: max_nn = None max_score = 0.0 time_total = 0.0 for _ in xrange(10): train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset) new_nn = MLPClassifier(warm_start=True, **nn_params[1]) train_x = [] train_t = [] shuffle(train_set) for index in xrange(len(train_set)): train_x.append(train_set[index][0]) train_t.append(train_set[index][1]) t0 = time() new_nn.fit(train_x, train_t) time_total += time() - t0 new_score = new_nn.score(test_x, test_t) print month, disease, nn_params[0], new_score if new_score > max_score: max_score = new_score max_nn = copy.deepcopy(new_nn) print 'Saving %f in time %f' % (max_score, time_total/10) prediction_tools[month][disease].append((max_nn, max_score, time_total/10)) for svm_params in self.generic_tools['svm']: max_svm = None max_score = 0.0 time_total = 0.0 for index in xrange(10): train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset) new_svm = svm.SVC(decision_function_shape='ovo', cache_size=2, **svm_params[1]) train_x = [] train_t = [] shuffle(train_set) for index in xrange(len(train_set)): train_x.append(train_set[index][0]) train_t.append(train_set[index][1]) t0 = time() new_svm.fit(train_x, train_t) time_total += time() - t0 new_score = new_svm.score(test_x, test_t) if new_score > max_score: max_score = new_score max_svm = copy.deepcopy(new_svm) print month, disease, svm_params[0], new_score print 'Saving %f in time %f' % (max_score, time_total/10) prediction_tools[month][disease].append((max_svm, max_score, time_total/10)) # save conf with open(SCORES_FILENAME, 'wb') as scores_file: pickle.dump(prediction_tools, scores_file, pickle.HIGHEST_PROTOCOL) self.prediction_tools = prediction_tools def get_prediction_tools_names(self): index = 0 tools = {} for tool in self.generic_tools['nn']: tools[tool[0]] = index index += 1 for tool in self.generic_tools['svm']: tools[tool[0]] = index index += 1 return tools def init_pred_tools_mock(self): prediction_tools = {} for month in xrange(1, INTERVAL_LENGHT): prediction_tools[month] = {} for disease in self.diseases: prediction_tools[month][disease] = [] # NN params for nn_params in self.generic_tools['nn']: prediction_tools[month][disease].append((None, random.random(), 5*random.random())) for svm_params in self.generic_tools['svm']: prediction_tools[month][disease].append((None, random.random(), 5*random.random())) self.prediction_tools_mock = prediction_tools def predict(self, month, disease, tool_index, predict_data): tool = self.prediction_tools[month][disease][tool_index][0] predict_data = array(predict_data).reshape(1, -1) predction_result = tool.predict(predict_data)[0] print predction_result return predction_result def create_statistics_NN(self): XLS_NAME = 'NN_table.xlsx' diseases = self.resources_manager.get_used_diseases() workbook = xlsxwriter.Workbook(XLS_NAME) worksheet = workbook.add_worksheet() format = workbook.add_format() format.set_bg_color('#FFE599') row = 0 # Headers worksheet.write(0, 0, 'Denumire Boala') worksheet.write(0, 1, 'Luni folosite') for index in xrange(len(self.generic_tools['nn'])): worksheet.write(0, 2 + index, self.generic_tools['nn'][index][0]) row += 1 # Content for disease in diseases: worksheet.write(row, 0, disease['name']) for month in xrange(1, 6): worksheet.write(row, 1, month) my_max = 0 cols_max = [] for index in xrange(len(self.generic_tools['nn'])): value = round(self.prediction_tools[month][disease['name']][index][1] * 100, 2) if value > my_max: my_max = value cols_max = [index] elif value == my_max: cols_max.append(index) worksheet.write(row, 2+index, value) for index in cols_max: worksheet.write(row, 2+index, my_max, format) row += 1 workbook.close() def create_statistics_SVM(self): XLS_NAME = 'SVM_table.xlsx' step = len(self.generic_tools['nn']) diseases = self.resources_manager.get_used_diseases() workbook = xlsxwriter.Workbook(XLS_NAME) worksheet = workbook.add_worksheet() format = workbook.add_format() format.set_bg_color('#FFE599') row = 0 # Headers worksheet.write(0, 0, 'Denumire Boala') worksheet.write(0, 1, 'Luni folosite') for index in xrange(len(self.generic_tools['svm'])): worksheet.write(0, 2 + index, self.generic_tools['svm'][index][0]) row += 1 # Content for disease in diseases: worksheet.write(row, 0, disease['name']) for month in xrange(1, 6): worksheet.write(row, 1, month) my_max = 0 cols_max = [] for index in xrange(len(self.generic_tools['svm'])): value = round(self.prediction_tools[month][disease['name']][step + index][1] * 100, 2) if value > my_max: my_max = value cols_max = [index] elif value == my_max: cols_max.append(index) worksheet.write(row, 2+index, value) for index in cols_max: worksheet.write(row, 2+index, my_max, format) row += 1 workbook.close() def best_NN(self): XLS_NAME = 'NN_best.xlsx' diseases = self.resources_manager.get_used_diseases() workbook = xlsxwriter.Workbook(XLS_NAME) worksheet = workbook.add_worksheet() format = workbook.add_format() format.set_bg_color('#FFE599') # Content heapq = [] for disease in diseases: # worksheet.write(row, 0, disease['name']) for month in xrange(1, 6): # worksheet.write(row, 1, month) line = [disease['name'], month] my_max = 0 cols_max = [] for index in xrange(len(self.generic_tools['nn'])): value = round(self.prediction_tools[month][disease['name']][index][1] * 100, 2) line.append(value) if value > my_max: my_max = value cols_max = [index] elif value == my_max: cols_max.append(index) # worksheet.write(row, 2+index, value) # for index in cols_max: # worksheet.write(row, 2+index, my_max, format) # row += 1 heappush(heapq, (-1 * my_max, cols_max, line)) row = 0 # Headers worksheet.write(0, 0, 'Denumire Boala') worksheet.write(0, 1, 'Luni folosite') for index in xrange(len(self.generic_tools['nn'])): worksheet.write(0, 2 + index, self.generic_tools['nn'][index][0]) row += 1 for i in xrange(15): line = heappop(heapq) for index, value in enumerate(line[2]): if index-2 in line[1]: worksheet.write(row, index, value, format) else: worksheet.write(row, index, value) row += 1 workbook.close() def best_SVM(self): XLS_NAME = 'SVM_best.xlsx' diseases = self.resources_manager.get_used_diseases() workbook = xlsxwriter.Workbook(XLS_NAME) worksheet = workbook.add_worksheet() step = len(self.generic_tools['nn']) format = workbook.add_format() format.set_bg_color('#FFE599') # Content heapq = [] for disease in diseases: # worksheet.write(row, 0, disease['name']) for month in xrange(1, 6): # worksheet.write(row, 1, month) line = [disease['name'], month] my_max = 0 cols_max = [] for index in xrange(len(self.generic_tools['svm'])): value = round(self.prediction_tools[month][disease['name']][step + index][1] * 100, 2) line.append(value) if value > my_max: my_max = value cols_max = [index] elif value == my_max: cols_max.append(index) # worksheet.write(row, 2+index, value) # for index in cols_max: # worksheet.write(row, 2+index, my_max, format) # row += 1 heappush(heapq, (-1 * my_max, cols_max, line)) row = 0 # Headers worksheet.write(0, 0, 'Denumire Boala') worksheet.write(0, 1, 'Luni folosite') for index in xrange(len(self.generic_tools['svm'])): worksheet.write(0, 2 + index, self.generic_tools['svm'][index][0]) row += 1 for i in xrange(15): line = heappop(heapq) for index, value in enumerate(line[2]): if index-2 in line[1]: worksheet.write(row, index, value, format) else: worksheet.write(row, index, value) row += 1 workbook.close() def sigmoid_effects(self): conf = { 'Boala interstitiala pulmonara': [1], 'Accident vascular cerebral': [1, 2, 3, 4], 'Neoplasm pulmonar': [2], 'Tulburari vasculare': [4, 3] } tools = [ {'solver': 'sgd', 'hidden_layer_sizes': (30, 30, 30, 30, 30), 'activation': 'logistic'}, {'solver': 'sgd', 'hidden_layer_sizes': (100, 100, 100), 'activation': 'logistic'}, {'kernel': 'sigmoid'} ] # headers XLS_NAME = 'Sigmoid_table.xlsx' workbook = xlsxwriter.Workbook(XLS_NAME) worksheet = workbook.add_worksheet() # format = workbook.add_format() # format.set_bg_color('#FFE599') # Headers worksheet.write(0, 0, 'Denumire Boala') worksheet.write(0, 1, 'Luni folosite') worksheet.write(0, 2, 'F1') worksheet.write(0, 3, 'F2') worksheet.write(0, 4, 'F3') row = 1 for disease in conf: worksheet.write(row, 0, disease) for month in conf[disease]: worksheet.write(row, 1, month) dataset = self.create_datasets(disease, month) train_set, test_x, test_t, _, _ = self.create_train_test_from_datasets(dataset) train_x = [] train_t = [] shuffle(train_set) for index in xrange(len(train_set)): train_x.append(train_set[index][0]) train_t.append(train_set[index][1]) model = MLPClassifier(**tools[0]) model.fit(train_x, train_t) score = model.score(test_x, test_t) worksheet.write(row, 2, round(score * 100, 2)) model = MLPClassifier(**tools[1]) model.fit(train_x, train_t) score = model.score(test_x, test_t) worksheet.write(row, 3, round(score * 100, 2)) model = svm.SVC(**tools[2]) model.fit(train_x, train_t) score = model.score(test_x, test_t) worksheet.write(row, 4, round(score * 100, 2)) row += 1 workbook.close() def create_plots_SVM(self): pylab.axis([0, 6, 0, 1]) # Cost1 vs cost2 plot disease = 'Accident vascular cerebral' x = [1, 2, 3, 4, 5] c1 = [] c2 = [] for i in x: c1.append(self.prediction_tools[i][disease][12][2]) c2.append(self.prediction_tools[i][disease][14][2]) pylab.plot(x,c1, label='Cost 0.0001') pylab.plot(x,c2, label='Cost 100') pylab.legend(loc='upper right') pylab.xlabel('Luna') pylab.ylabel('Timp de antrenare (s)') pylab.show()
def __init__(self): super(Statistics_Manager, self).__init__() self.resources_manager = Resources_Manager() self.counties = self.resources_manager.get_counties()
class Statistics_Manager(object): """Statistics Manager for Stations""" def __init__(self): super(Statistics_Manager, self).__init__() self.resources_manager = Resources_Manager() self.counties = self.resources_manager.get_counties() @staticmethod def compute_air_quality_index(params_codif, params_values): max_index = -1 for param_indentifier in AQI_INDEXES: index = 0 param_value = params_values[params_codif[param_indentifier]] if param_value <= 0: continue for val in AQI_INDEXES[param_indentifier]: if param_value < val: break index += 1 max_index = max(max_index, index) return max_index + 1 @staticmethod def compute_element_index_codification(elements): elements_indexes = {} index = 0 for element in elements: elements_indexes[element] = index index += 1 return elements_indexes """ This function must not be here. It will be placed in statistics_manager. """ def compute_statistics_for_station(self, station, parameters, resources_manager): station_statistics = {} station_statistics['internationalCode'] = station['internationalCode'] station_statistics['county'] = station['county'] statistics = [] for year in xrange(_START_YEAR, _LAST_YEAR): for month in MONTHS: parameter_value_dict = {} for parameter in parameters: if parameter['index'] == NO2_INDEX: measurements_NOx = resources_manager.get_measurements_for_station( station['internationalCode'], year, month, NOx_INDEX) measurements_NO = resources_manager.get_measurements_for_station( station['internationalCode'], year, month, NO_INDEX) if measurements_NO and measurements_NOx: NOx_avg = compute_average(measurements_NOx[0]['measurements']) NO_avg = compute_average(measurements_NO[0]['measurements']) NO2_avg = NOx_avg - NO_avg if NO2_avg > 0 and NOx_avg > 0 and NO_avg > 0: parameter_value_dict[str(NO2_INDEX)] = NO2_avg else: parameter_value_dict[str(NO2_INDEX)] = 0 else: parameter_value_dict[str(NO2_INDEX)] = 0 else: measurements = resources_manager.get_measurements_for_station( station['internationalCode'], year, month, parameter['index']) if measurements: if parameter['index'] == RAINFALL_INDEX: parameter_value_dict[str(parameter['index'])] = compute_sum( measurements[0]['measurements']) else: parameter_value_dict[str(parameter['index'])] = compute_average( measurements[0]['measurements']) else: parameter_value_dict[str(parameter['index'])] = 0 statistics.append({'year': year, 'month': month, 'values': parameter_value_dict}) station_statistics['statistics'] = statistics return station_statistics def create_statistics_for_diseases(self): counties = self.resources_manager.get_counties() for disease_category in DISEASE_CONFIGURATION: for disease_name in DISEASE_CONFIGURATION[disease_category]: initial_disease_statistics = [] used_disease_object = {} used_disease_object['name'] = disease_name used_disease_object['category'] = disease_category used_disease_object['avg_cases'] = 0.0 statistics = {} for disease_code in DISEASE_CONFIGURATION[disease_category][disease_name]: disease = self.resources_manager.get_disease_by_code(disease_code) initial_disease_statistics.append(disease) for county in counties: county_statistic_obj = {} for year in xrange(_START_YEAR, _LAST_YEAR): county_statistic_obj[str(year)] = {} for month in range(12): county_statistic_obj[str(year)][str(month)] = 0 statistics[county['name']] = county_statistic_obj for disease in initial_disease_statistics: used_disease_object['avg_cases'] += disease['avg_cases'] for statistic in disease['statistics']: date = datetime.strptime(statistic['start_date'], '%d.%m.%Y') statistic_county = (CANONIC_COUNTY_NAMES[statistic['county']] if statistic['county'] in CANONIC_COUNTY_NAMES else statistic['county']) statistic_year = date.strftime('%Y') statistic_month = str(int(date.strftime('%m'))-1) statistics[statistic_county][statistic_year][statistic_month] += ( statistic['total_number_cases']) used_disease_object['statistics'] = statistics self.resources_manager.insert_update_disease_statistics(used_disease_object) def air_pollution_county_statistics(self): """Return statistics between air_pollution and counties. Returns: statistics: 4-dimensional matrix. stastics[county][year][month][parameter] = value """ counties = [x['name'] for x in self.resources_manager.get_counties()] counties = sorted(counties) parameters = [x['index'] for x in self.resources_manager.get_viewed_parameters()] parameters = sorted(parameters) # remove AQI parameters.remove(AQI_INDEX) counties_indexes = self.compute_element_index_codification(counties) parameter_indexes = self.compute_element_index_codification(parameters) stations_statistics = self.resources_manager.get_stations_statistics() statistics = [[[[0 for _ in parameters] for _ in range(12)] for _ in range(2010, 2017)] for _ in counties] statistics_stations = [[[[0 for _ in parameters] for _ in range(12)] for _ in range(2010, 2017)] for _ in counties] stations_in_counties = [set() for county in counties] print parameter_indexes print parameters # Create 4d matrix of statistics for each county for station in stations_statistics: county_index = counties_indexes[station['county']] stations_in_counties[county_index].add(station['internationalCode']) for statistic in station['statistics']: statistic_year = int(statistic['year']) if statistic_year >= _START_YEAR: year_index = statistic_year - _START_YEAR month = int(statistic['month']) - 1 for parameter in statistic['values']: if int(parameter) in parameter_indexes: parameter_index = parameter_indexes[int(parameter)] statistics[county_index][year_index][month][parameter_index] += ( statistic['values'][parameter]) if statistic['values'][parameter] > 0: statistics_stations[county_index][year_index][month][parameter_index] += 1 for county in counties: county_index = counties_indexes[county] for year_index in range(_LAST_YEAR - _START_YEAR): for month_index in range(12): for param_index in range(len(parameters)-1): stations_no = ( statistics_stations[county_index][year_index][month][parameter_index]) if stations_no > 0: statistics[county_index][year_index][month_index][param_index] /= ( stations_no) aqi_index = self.compute_air_quality_index( parameter_indexes, statistics[county_index][year_index][month_index]) statistics[county_index][year_index][month_index].append(aqi_index) return stations_in_counties, statistics def get_disease_county_statistics(self): """Return statistics between diseases and counties. Returns: statistics: 4-dimensional matrix. stastics[county][year][month][disease] = value """ counties = [x['name'] for x in self.resources_manager.get_counties()] counties = sorted(counties) counties_indexes = self.compute_element_index_codification(counties) used_diseases = self.resources_manager.get_used_diseases() sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases]) used_diseases_indexes = self.compute_element_index_codification(sorted_used_diseases_names) statistics = [[[[0 for _ in used_diseases] for _ in range(12)] for _ in range(_START_YEAR, _LAST_YEAR)] for _ in counties] for disease in used_diseases: disease_index = used_diseases_indexes[disease['name']] for county in disease['statistics']: county_index = counties_indexes[county] for year in disease['statistics'][county]: statistic_year = int(year) if statistic_year >= _START_YEAR: year_index = statistic_year - _START_YEAR for month in disease['statistics'][county][year]: month_index = int(month) statistics[county_index][year_index][month_index][disease_index] = ( disease['statistics'][county][year][month]) diseases_boundaries = {} for disease in used_diseases: disease_index = used_diseases_indexes[disease['name']] disease_values = set() for county in disease['statistics']: for year in disease['statistics'][county]: if statistic_year >= _START_YEAR: for month in disease['statistics'][county][year]: disease_values.add(disease['statistics'][county][year][month]) disease_values = sorted(disease_values) disease_values_len = len(disease_values) diseases_boundaries[disease_index] = [int(x * disease_values_len) for x in DISEASE_BOUNDARIES[disease['name']]] return statistics, diseases_boundaries def update_disease_average_cases(self): for disease in self.resources_manager.get_all_diseases(): avg = 0.0 for statistic in disease['statistics']: avg += statistic['total_number_cases'] disease['avg_cases'] = avg / len(disease['statistics']) self.resources_manager.update_disease_metadata(disease) def get_diseases_codification(self): used_diseases = self.resources_manager.get_used_diseases() sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases]) used_diseases_indexes = self.compute_element_index_codification( sorted_used_diseases_names) return used_diseases_indexes
disease_values = sorted(disease_values) disease_values_len = len(disease_values) diseases_boundaries[disease_index] = [int(x * disease_values_len) for x in DISEASE_BOUNDARIES[disease['name']]] return statistics, diseases_boundaries def update_disease_average_cases(self): for disease in self.resources_manager.get_all_diseases(): avg = 0.0 for statistic in disease['statistics']: avg += statistic['total_number_cases'] disease['avg_cases'] = avg / len(disease['statistics']) self.resources_manager.update_disease_metadata(disease) def get_diseases_codification(self): used_diseases = self.resources_manager.get_used_diseases() sorted_used_diseases_names = sorted([disease['name'] for disease in used_diseases]) used_diseases_indexes = self.compute_element_index_codification( sorted_used_diseases_names) return used_diseases_indexes if __name__ == '__main__': sm = Statistics_Manager() rm = Resources_Manager() print 'Computing average' # sm.update_disease_average_cases() print 'Done' # sm.create_statistics_for_diseases()
from flask import Flask, jsonify, request, json, Response, send_from_directory from flask_cors import CORS, cross_origin import json from flask_cache import Cache import numpy from managers.prediction_manager import Prediction_Manager from managers.resources_manager import Resources_Manager if __name__ == '__main__': app = Flask(__name__) CORS(app) cache = Cache(app, config={'CACHE_TYPE': 'simple'}) prediction_manager = Prediction_Manager() resources_manager = Resources_Manager() diseases_names = [x['name'] for x in resources_manager.get_used_diseases()] prediction_manager.init_prediction_tools() @app.route('/api/prediction/info') @cache.cached(timeout=2592000) def get_prediction_info(): prediction_tools = prediction_manager.get_prediction_tools_names() scores = {} for month in xrange(1, 6): scores[month] = {} for disease_name in diseases_names: scores[month][disease_name] = [] for index in xrange(len(prediction_tools)): scores[month][disease_name].append(