def modelselect(ap, trainSize, testSize, skipSize=0): larclasPred = {} totalBias = 0 totalCount = 0 loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize, testSize, skipSize) # middle class while (True): midclass, trD, trL, teD, teL = loader.getNextMidClass() if (midclass == 0): break else: (model, bias, teP) = trainAndCompare(ap, midclass, trD, trL, teD, teL, zeros(testSize)) larclass = int(midclass / 100) totalCount += testSize totalBias += bias bias = math.sqrt(bias / testSize) print("(Midclass %d select model %d, accuracy: %f)" % (midclass, model, bias)) setModel(midclass, model) if (larclass in larclasPred): larclasPred[larclass] += teP else: larclasPred[larclass] = teP # large class while (True): larclass, trD, trL, teD, teL = loader.getNextLarClass() if (larclass == 0): break else: if (larclass in larclasPred): (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, larclasPred[larclass]) else: (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, zeros(testSize)) totalCount += testSize totalBias += bias bias = math.sqrt(bias / testSize) print("(Larclass %d select model %d, accuracy: %f)" % (larclass, model, bias)) setModel(larclass, model) totalBias = math.sqrt(totalBias / totalCount) print("(Predict finished, accuracy: %f)" % (totalBias)) loader.closeFiles()
def submit(trainSize, cvSize): larclasPred = {} f1 = open("example.csv", "r") submit_csv = csv.reader(f1) row = submit_csv.next() f2 = open('submit.csv', 'wb') writer = csv.writer(f2) writer.writerow(row) loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize) preDate = range(0, 9) + range(10, 59) # middle class goal = xgp.createFeature(dt.datetime(2015, 9, 1), 59, 2, range(31, 38), [30], [39, 40]) while (True): midclass, trD, trL, teD, teL = loader.getNextMidClass() if (midclass == 0): break else: teP = predictClass(midclass, cvSize, trD, trL, goal, zeros(59)) writeClass(midclass, teP, preDate, submit_csv, writer) # count larclass larclass = int(midclass / 100) if (larclass in larclasPred): larclasPred[larclass] += teP else: larclasPred[larclass] = teP # large class goal = xgp.createFeature(dt.datetime(2015, 9, 1), 59, 1, range(31, 38), [30], [39, 40]) while (True): larclass, trD, trL, teD, teL = loader.getNextLarClass() if (larclass == 0): break else: if (larclass in larclasPred): teP = predictClass(larclass, cvSize, trD, trL, goal, larclasPred[larclass]) else: teP = predictClass(larclass, cvSize, trD, trL, goal, zeros(59)) writeClass(larclass, teP, preDate, submit_csv, writer) f1.close() f2.close() loader.closeFiles()
def modelselect(trainSize, testSize, skipSize=0): global larclasPred, totalBias, totalCount, modelChoose, lcModelChoose, ap larclasPred = {} totalBias = 0 totalCount = 0 modelChoose = [] lcModelChoose = [] loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize, testSize, skipSize) # middle class while (True): midclass, trD, trL, _, teL = loader.getNextMidClass() if (midclass == 0): break else: # sarima model try: model = ap.sarimaTrain(midclass, trL, teL) teP1 = ap.sarimaPredict(model, testSize) except: teP1 = zeros(testSize) # kNN model try: teP2 = KNN_interface.knn(trL, testSize) except: print("Warning: kNN train fail") teP2 = zeros(testSize) # just zero teP3 = zeros(testSize) # count bias of midclass and update larclass label = array(teL) larclass = int(midclass / 100) totalCount += testSize bias1 = sum((teP1 - label) * (teP1 - label)) bias2 = sum((teP2 - label) * (teP2 - label)) bias3 = sum((teP3 - label) * (teP3 - label)) if (bias3 <= bias1 and bias3 <= bias2): totalBias += bias3 bias3 = math.sqrt(bias3 / testSize) print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3) modelChoose.append(3) if (larclass in larclasPred): larclasPred[larclass] += teP3 else: larclasPred[larclass] = teP3 elif (bias1 <= bias2): totalBias += bias1 bias1 = math.sqrt(bias1 / testSize) print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1) modelChoose.append(1) if (larclass in larclasPred): larclasPred[larclass] += teP1 else: larclasPred[larclass] = teP1 else: totalBias += bias2 bias2 = math.sqrt(bias2 / testSize) print "(Midclass %d select kNN, accuracy: %f)" % (midclass, bias2) modelChoose.append(2) if (larclass in larclasPred): larclasPred[larclass] += teP2 else: larclasPred[larclass] = teP2 # large class while (True): larclass, trD, trL, _, teL = loader.getNextLarClass() if (larclass == 0): break else: # sarima model try: model = ap.sarimaTrain(larclass, trL, teL) teP1 = ap.sarimaPredict(model, testSize) except: teP1 = zeros(testSize) # knn model try: teP2 = KNN_interface.knn(trL, testSize) except: print("Warning: kNN train fail") teP2 = zeros(testSize) # sum of midclasses teP3 = larclasPred[larclass] # count bias of midclass and update larclass label = array(teL) totalCount += testSize bias1 = sum((teP1 - label) * (teP1 - label)) bias2 = sum((teP2 - label) * (teP2 - label)) bias3 = sum((teP3 - label) * (teP3 - label)) if (bias3 <= bias1 and bias3 <= bias2): totalBias += bias3 bias3 = math.sqrt(bias3 / testSize) print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3) lcModelChoose.append(3) elif (bias1 <= bias2): totalBias += bias1 bias1 = math.sqrt(bias1 / testSize) print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1) lcModelChoose.append(1) else: totalBias += bias2 bias2 = math.sqrt(bias2 / testSize) print "(Larclass %d select kNN, accuracy: %f)" % (larclass, bias2) lcModelChoose.append(2) totalBias = math.sqrt(totalBias / totalCount) print "(Predict finished, accuracy: %f)" % (totalBias) loader.closeFiles()
def submit(trainSize): global larclasPred, ap larclasPred = {} f1 = open("submit.csv", "r") submit_csv = csv.reader(f1) submit_csv.next() f2 = open('submit1.csv', 'wb') writer = csv.writer(f2) loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize) # middle class current = 0 while (True): midclass, trD, trL, teD, teL = loader.getNextMidClass() if (midclass == 0): break else: if (modelChoose[current] == 1): try: model = ap.sarimaTrain(midclass, trL) teP = ap.sarimaPredict(model, 30) except: print("%d: failed to use arima, use kNN instead" % midclass) teP = KNN_interface.knn(trL, 30) elif (modelChoose[current] == 2): teP = KNN_interface.knn(trL, 30) else: teP = zeros(30) current += 1 for x in teP: x_int = round(x) row = submit_csv.next() if (int(row[0]) != midclass): raise KeyError writer.writerow([row[0], row[1], x_int]) # count larclass larclass = int(midclass / 100) if (larclass in larclasPred): larclasPred[larclass] += teP else: larclasPred[larclass] = teP # large class current = 0 while (True): larclass, trD, trL, teD, teL = loader.getNextLarClass() if (larclass == 0): break else: if (lcModelChoose[current] == 1): try: model = ap.sarimaTrain(larclass, trL) teP = ap.sarimaPredict(model, 30) except: print("%d: failed to use arima, use kNN instead" % larclass) teP = KNN_interface.knn(trL, 30) elif (lcModelChoose[current] == 2): teP = KNN_interface.knn(trL, 30) else: teP = larclasPred[larclass] current += 1 # write file - larclass for x in teP: x_int = round(x) row = submit_csv.next() if (int(row[0]) != larclass): raise KeyError writer.writerow([row[0], row[1], x_int]) f1.close() f2.close() loader.closeFiles()
@author: wangjun """ import numpy as np import math from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM import dataLoader import matplotlib.pyplot as plt import xgboostPredicter loader = dataLoader.loader("datam.csv") loader.setSize(200, 43, 0) midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass() loader.closeFiles() seq_length = 0 data_max = 35 dataX = [] dataY = [] trainLabelN = [] for i in range(0, len(trainLabel)): trainLabelN.append(trainLabel[i] / data_max) for i in range(0, len(trainLabelN) - seq_length): dataX.append(trainData[i+seq_length]+trainLabelN[i:i+seq_length])
import numpy as np import pandas as pd import dataLoader as dl """Clean and convert pandas DataFrame data of municipality infection cases destribution, and save it as .csv. """ file_id = '1Gt8Rn8Md4FJRJ7f44h53v1uvCCpYh-qmZVe5mayedCA' file_url = 'https://docs.google.com/spreadsheets/d/{file_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}' sheets = ['munic'] loaded = {} for sheet_name in sheets: loaded[sheet_name] = dl.loader(file_id, file_url, sheet_name) # table data preparing data = loaded['munic'] # transform data data.drop('ID', axis=1, inplace=True) data = data.pivot(index='Дата', columns='Регион', values='Выявлено') data.loc['2020-05-19'] = 0 data.index = pd.to_datetime(data.index, dayfirst=True) data.sort_index(inplace=True) data.fillna(method='ffill', inplace=True) data = data.diff() data.fillna(method='ffill', inplace=True) data.fillna(0, inplace=True) data = data.astype(np.int16) data.reset_index(inplace=True)
def modelselect(trainSize, testSize, skipSize=0): larclasPred = {} totalBias = 0 totalCount = 0 loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize, testSize, skipSize) # middle class while (True): midclass, trD, trL, teD, teL = loader.getNextMidClass() if (midclass == 0): break else: # sarima model try: model = ap.sarimaTrain(midclass, trL, teL) teP1 = ap.sarimaPredict(model, testSize) except: teP1 = zeros(testSize) # xgboost model simulateFeature(teD, [-2, -1]) try: teP2 = xgboostPredict(array(trD), array(trL), array(teD)) except: teP2 = zeros(testSize) # just zero teP3 = zeros(testSize) # count bias of midclass and update larclass label = array(teL) larclass = int(midclass / 100) totalCount += testSize bias1 = sum((teP1 - label) * (teP1 - label)) bias2 = sum((teP2 - label) * (teP2 - label)) bias3 = sum((teP3 - label) * (teP3 - label)) if (bias3 <= bias1 and bias3 <= bias2): totalBias += bias3 bias3 = math.sqrt(bias3 / testSize) print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3) setModel(midclass, 3) if (larclass in larclasPred): larclasPred[larclass] += teP3 else: larclasPred[larclass] = teP3 elif (bias1 <= bias2): totalBias += bias1 bias1 = math.sqrt(bias1 / testSize) print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1) setModel(midclass, 1) if (larclass in larclasPred): larclasPred[larclass] += teP1 else: larclasPred[larclass] = teP1 else: totalBias += bias2 bias2 = math.sqrt(bias2 / testSize) print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2) setModel(midclass, 2) if (larclass in larclasPred): larclasPred[larclass] += teP2 else: larclasPred[larclass] = teP2 # large class while (True): larclass, trD, trL, teD, teL = loader.getNextLarClass() if (larclass == 0): break else: # sarima model try: model = ap.sarimaTrain(larclass, trL, teL) teP1 = ap.sarimaPredict(model, testSize) except: teP1 = zeros(testSize) # xgboost model simulateFeature(teD, [-2, -1]) try: teP2 = xgboostPredict(array(trD), array(trL), array(teD)) except: teP2 = zeros(testSize) # sum of midclasses try: teP3 = larclasPred[larclass] except: teP3 = zeros(testSize) # count bias of midclass and update larclass label = array(teL) totalCount += testSize bias1 = sum((teP1 - label) * (teP1 - label)) bias2 = sum((teP2 - label) * (teP2 - label)) bias3 = sum((teP3 - label) * (teP3 - label)) if (bias3 <= bias1 and bias3 <= bias2): totalBias += bias3 bias3 = math.sqrt(bias3 / testSize) print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3) setModel(larclass, 3) elif (bias1 <= bias2): totalBias += bias1 bias1 = math.sqrt(bias1 / testSize) print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1) setModel(larclass, 1) else: totalBias += bias2 bias2 = math.sqrt(bias2 / testSize) print "(Larclass %d select XGBOOST, accuracy: %f)" % (larclass, bias2) setModel(larclass, 2) totalBias = math.sqrt(totalBias / totalCount) print "(Predict finished, accuracy: %f)" % (totalBias) loader.closeFiles()
def submit(trainSize): global larclasPred larclasPred = {} f1 = open("example.csv", "r") submit_csv = csv.reader(f1) row = submit_csv.next() f2 = open('submit.csv', 'wb') writer = csv.writer(f2) writer.writerow(row) loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(trainSize) preDate = range(0, 9) + range(10, 59) # middle class goal = createFeature(dt.datetime(2015, 9, 1), 59, 2, range(31, 38), [30], [39, 40]) while (True): midclass, trD, trL, teD, teL = loader.getNextMidClass() if (midclass == 0): break else: if (modelChoose[midclass] == 1): try: model = ap.sarimaTrain(midclass, trL) teP = ap.sarimaPredict(model, 59) except: print("%d: failed to use arima, use xgboost instead" % midclass) teP = xgboostPredict(array(trD), array(trL), array(goal)) elif (modelChoose[midclass] == 2): teP = xgboostPredict(array(trD), array(trL), array(goal)) else: teP = zeros(59) for i in preDate: x_int = round(teP[i]) if (x_int < 0): x_int = 0 row = submit_csv.next() if (int(row[0]) != midclass): raise KeyError writer.writerow([row[0], row[1], x_int]) # count larclass larclass = int(midclass / 100) if (larclass in larclasPred): larclasPred[larclass] += teP else: larclasPred[larclass] = teP # large class goal = createFeature(dt.datetime(2015, 9, 1), 59, 1, range(31, 38), [30], [39, 40]) while (True): larclass, trD, trL, teD, teL = loader.getNextLarClass() if (larclass == 0): break else: if (modelChoose[larclass] == 1): try: model = ap.sarimaTrain(larclass, trL) teP = ap.sarimaPredict(model, 59) except: print("%d: failed to use arima, use xgboost instead" % larclass) teP = xgboostPredict(array(trD), array(trL), array(goal)) elif (modelChoose[larclass] == 2): teP = xgboostPredict(array(trD), array(trL), array(goal)) else: try: teP = larclasPred[larclass] except: teP = zeros(59) # write file - midclass for i in preDate: x_int = round(teP[i]) if (x_int < 0): x_int = 0 row = submit_csv.next() if (int(row[0]) != larclass): raise KeyError writer.writerow([row[0], row[1], x_int]) f1.close() f2.close() loader.closeFiles()
def dataLoader(self): loader = dataLoader.loader(self.rText.text, self.path) return loader.urlLoader()
def sariamOutput(): loader = dataLoader.loader("datam.csv", "lcdatam.csv") loader.setSize(120, 0, 0) f1 = open("result01.csv", "wb") writer1 = csv.writer(f1) f2 = open("result11.csv", "wb") writer2 = csv.writer(f2) f3 = open("result12.csv", "wb") writer3 = csv.writer(f3) ap = arimaPredicter.predicter() ap.setIndex(index) while (True): midclass, _, trainData, _, _ = loader.getNextMidClass() if (midclass == 0): break ap.setPara(midclass, (0, 1)) try: model = ap.sarimaTrain(midclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer1.writerow([midclass, "201505%02d" % (i + 1), result[i]]) ap.setPara(midclass, (1, 1)) try: model = ap.sarimaTrain(midclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer2.writerow([midclass, "201505%02d" % (i + 1), result[i]]) ap.setPara(midclass, (1, 2)) try: model = ap.sarimaTrain(midclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer3.writerow([midclass, "201505%02d" % (i + 1), result[i]]) while (True): larclass, _, trainData, _, _ = loader.getNextLarClass() if (larclass == 0): break ap.setPara(larclass, (0, 1)) try: model = ap.sarimaTrain(larclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer1.writerow([larclass, "201505%02d" % (i + 1), result[i]]) ap.setPara(larclass, (1, 1)) try: model = ap.sarimaTrain(larclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer2.writerow([larclass, "201505%02d" % (i + 1), result[i]]) ap.setPara(larclass, (1, 2)) try: model = ap.sarimaTrain(larclass, trainData) result = ap.sarimaPredict(model, 30) except: result = np.zeros(30) for i in range(0, 30): writer3.writerow([larclass, "201505%02d" % (i + 1), result[i]]) f1.close() f2.close() f3.close() loader.closeFiles()
def main(): """Clean and convert pandas DataFrame main data, and save it as .csv. Function is used in github acrion. For details look at .github/workflows/dataloader.yml """ file_id = '1iAgNVDOUa-g22_VcuEAedR2tcfTlUcbFnXV5fMiqCR8' file_url = 'https://docs.google.com/spreadsheets/d/{file_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}' sheets = ['data', 'destrib', 'rosstat'] loaded = {} for sheet_name in sheets: loaded[sheet_name] = dl.loader(file_id, file_url, sheet_name) # table data preparing data = loaded['data'] # replace nan to zeros data.fillna(0, inplace=True) # replace , by . in float numeric data['infection rate'] = data['infection rate'].apply(lambda x: str(x)) data['IR7'] = data['IR7'].apply(lambda x: str(x)) data['infection rate'] = data['infection rate'].apply( lambda x: x.replace(',', '.')) data['IR7'] = data['IR7'].apply(lambda x: x.replace(',', '.')) # calculate cumulative metrics data['кумул. случаи'] = data['всего'].cumsum() data['кумул.умерли'] = data['умерли от ковид'].cumsum() data['кумул.выписаны'] = data['выписали'].cumsum() data['кумул.активные'] = data['кумул. случаи'].sub( data['кумул.выписаны']).sub(data['кумул.умерли']) # scaling for tests data['кол-во тестов / 10'] = data['кол-во тестов'] / 10 # region columns data['все кроме Калининграда'] = data.filter(regex='округ').sum(axis=1) # drop textual data data.drop(['учебные учреждения'], axis=1, inplace=True) # calculate attitude for infection rate data['infection rate'] = data['infection rate'].astype(np.float16) data['plus'] = data[data['infection rate'] >= 1]['infection rate'] data['minus'] = data[data['infection rate'] < 1]['infection rate'] data['plus'] = data['plus'].mask(data['plus'] >= 0, 1) data['minus'] = data['minus'].mask(data['minus'] >= 0, 1) data['plus'] = data['plus'].cumsum() data['minus'] = data['minus'].cumsum() data[['plus', 'minus']] = data[['plus', 'minus']].astype("object").fillna(method='ffill') data['отношение'] = data['plus'] / data['minus'] data.drop(['plus', 'minus'], axis=1, inplace=True) # minimize numerics memory sizes data['IR7'] = data['IR7'].astype(np.float16) data['отношение'] = data['отношение'].astype(np.float16) data['отношение'] = data['отношение'].apply(lambda x: round(x, 2)) data['кол-во тестов кумул'] = data['кол-во тестов кумул'].astype(np.int32) data['поступило кумулятивно'] = data['поступило кумулятивно'].astype( np.int32) data['компонент 1'] = data['компонент 1'].astype(np.int32) data['компонент 2'] = data['компонент 2'].astype(np.int32) for i in data.columns.difference([ 'дата', 'infection rate', 'IR7', 'отношение', 'кол-во тестов / 10', 'кол-во тестов кумул', 'поступило кумулятивно', 'компонент 1', 'компонент 2', ]): data[i] = data[i].astype(np.int16) # flush data.to_csv(dl.pathMaker('data'), index=False) # table destrib preparing destrib = loaded['destrib'] destrib.fillna(0, inplace=True) for i in destrib.columns.difference(['дата']): destrib[i] = destrib[i].astype(np.int8) destrib.to_csv(dl.pathMaker('destrib'), index=False) # table rosstat preparing rosstat = loaded['rosstat'] rosstat.fillna(0, inplace=True) for i in rosstat.columns.difference(['Месяц']): rosstat[i] = rosstat[i].astype(np.int16) rosstat.to_csv(dl.pathMaker('rosstat'), index=False)
import game_of_life as game import dataLoader as Loader # # set=[5,6,7,26,27,45,47] # g = game.GameMap(5,[5,12,17,22,14,6]) # # g = game.GameMap(20,set) # for i in range(20): # print(g) # # print(g.living_neighbors_list()) # g = g.map_update() # print('..............................') loader = Loader.loader() # print(loader.loadertype) # print(loader.size) # print(loader.initdata) g = game.GameMap(loader.size, loader.initdata) for i in range(20): print(g) # print(g.living_neighbors_list()) g = g.map_update() print('..............................')