def error_analyze(make_model, train_data, test_data, featurizer): matrices = Features.make_experiment_matrices(train_data, test_data, featurizer) model = make_model() model.fit(matrices['train_X'], matrices['train_Y']) bins = [v / 100.0 for v in range(50, 110, 5)] ext_preds = Models.extended_predict(model, matrices['test_X'], matrices['test_Y']) return Models.error_analysis(ext_preds, bins = bins)
def main(): if len(sys.argv) < 4: print('not enough arguments') sys.exit() else: purpose = sys.argv[1] model_name = sys.argv[2] # use sample data by default data_file = config.data_dir + "/" + sys.argv[3] if purpose == "train": model = Models.get_instance(model_name) train(data_file, model, model_name) elif purpose == "feature_selection": model = Models.get_instance(model_name) feature_selection(data_file, model) else: if len(sys.argv) < 5: print('not enough arguments') sys.exit() else: weight_file = sys.argv[4] model = joblib.load(weight_file) df = predict(data_file, model) df.to_csv(config.data_dir+"/prediction_result_" + get_time_str() + ".csv")
def log_results(out_file, msg, tw_cv, blog_cv, featurizer): if blog_cv: print msg + " blog(80%) -> blog(80%) CV-10" results_b = Models.model_cv(Models.LogisticRegression, blog_cv, featurizer, n_folds = 10) write_cv(out_file, msg + " b", results_b) if tw_cv and blog_cv: print msg + " twitter+wiki -> blog(80%)" results_twb = experiment_maxent(tw_cv, blog_cv, featurizer) write_detailed(out_file, msg + " twb", results_twb) if tw_cv: print msg + " twitter+wiki -> twitter+wiki CV-5" results_tw = Models.model_cv(Models.LogisticRegression, tw_cv, featurizer, n_folds = 5) write_cv(out_file, msg + " tw", results_tw)
def post(self): self.response.out.write("added <br />") user = users.get_current_user() #See if the user choose to upload a payslip or other if self.request.POST['type'] == "payslip": self.response.out.write("payslip <br />") payslip = Models.Payslip(parent=Models.payslip_key(user.user_id())) #Set the model attributes payslip.ownerId = user.user_id() payslip.beginning = datetime.datetime.strptime(self.request.POST['beginning'],'%Y-%m-%d').date() payslip.ending = datetime.datetime.strptime(self.request.POST['ending'],'%Y-%m-%d').date() payslip.income = float(self.request.POST['income']) payslip.tax = float(self.request.POST['tax']) payslip.net = float(self.request.POST['income']) - float(self.request.POST['tax']) payslip.company = self.request.POST['company'] #Output the given form to the confirmation page self.response.out.write(payslip.ownerId + "<br/>" ) self.response.out.write(str(payslip.upload_date) + "<br/>") self.response.out.write(str(payslip.beginning) + "<br/>") self.response.out.write(str(payslip.ending) + "<br/>") self.response.out.write(str(payslip.income) + "<br/>") self.response.out.write(str(payslip.tax) + "<br/>") self.response.out.write(str(payslip.net) + "<br/>") self.response.out.write(str(payslip.company) + "<br/>") #add the model to the data store payslip.put() else: self.response.out.write("other <br />") file = Models.File(parent=Models.file_key(user.user_id())) #Set the model attributes file.ownerId = user.user_id() file.title = self.request.POST['title'] file.description = self.request.POST['description'] #Output the given form to the confirmation page self.response.out.write( file.ownerId + "<br/>" ) self.response.out.write(file.title + "<br/>") self.response.out.write(file.description + "<br/>") #add the model to the data store file.put()
def generate_files_html(self): files = db.GqlQuery("SELECT * " "FROM File " "WHERE ANCESTOR IS :1 ", Models.file_key(users.get_current_user().user_id())) html = "" for file in files: if file.file_key: html += """ <tr> <td>"""+str(file.upload_date)+"""</td> <td>"""+file.title+"""</td> <td>"""+file.description+"""</td> <td><a href="/view_file/"""+str(file.file_key.key())+"""">View</a></td> </tr> """ else: html += """ <tr> <td>"""+str(file.upload_date)+"""</td> <td>"""+file.title+"""</td> <td>"""+file.description+"""</td> <td>View</td> </tr> """ return html
def literature_comp(out_file): # compare performance vs the way it was done in literature # Blog 10-fold CV blog_results_me = Models.model_cv(Models.LogisticRegression, blog, feat4, stratified = True) blog_results_svm = Models.model_cv(Models.LinearSVC, blog, feat4, stratified = True) blog_results_svm_tfidf = Models.model_cv(Models.LinearSVC, blog, feat2, stratified = True) write_cv(out_file, "blog stratified ME feature 4 cv-10", blog_results_me) write_cv(out_file, "blog stratified SVM feature 4 cv-10", blog_results_svm) write_cv(out_file, "blog stratified SVM tf-idf cv-10", blog_results_svm_tfidf) # twitter trained on solely twitter and evaluated on positive/negative classification t_training_granular = to_utf8(prepareTwitterDataWithPNLabel(Globals.TWITTER_TRAIN, splitwords = False)) t_test_granular = to_utf8(prepareTwitterDataWithPNLabel(Globals.TWITTER_TEST, splitwords = False)) t_test_no_neutral = filter(lambda x: x[1] != '2', t_test_granular) # twitter granular, ignore neutral t_maxent = experiment_maxent(t_training_granular, t_test_no_neutral, feat4) # just bigram counts...cannot get the performance report in the presentation t_extra = experiment_maxent(t_training_granular, t_test_no_neutral, extra_features) write_detailed(out_file, "twitter ME feature 4 test results", t_maxent) write_detailed(out_file, "twitter ME extra (uni+bi+valence+punct) test results", t_extra)
def __init__(self, options): self.do = options['data_options'] self.mo = options['model_options'] self.oo = options['optimization_options'] self.lo = options['log_options'] data_path = self.do['data_path'] task_num = self.do['task_number'] lang = self.do.get('language', 'en') # defaults to use small Eng set self.qa_train, self.qa_test \ = read_dataset(data_path, task_num, lang, options['data_options']['reader'], {'threshold': 0, 'context_length': self.mo['context_length'], 'context_length_percentage': self.mo.get('context_length_percentage', 1), 'sentence_length': self.mo['sentence_length']}) self.data_size = len(self.qa_train.stories) self.mo['context_length'] = self.qa_train.context_length #self.options['model_options']['context_length'] = self.qa_train.context_length tokens = self.qa_train.specialWords self.NULL = tokens['<NULL>'] self.EOS = tokens['<EOS>'] self.UNKNOWN = tokens['<UNKNOWN>'] if self.oo['dump_params']: weight_dir = Path(self.oo['weight_path']) if not weight_dir.exists(): weight_dir.mkdir() self.batch_size_train = self.oo['batch_size_train'] self.batch_size_test = self.oo['batch_size_test'] self.verbose = self.oo['verbose'] self.log = self.logger_factory() self.lo['dump_epoch'] = self.oo['max_epoch'] \ if self.lo['dump_epoch'] < 0 \ else self.lo['dump_epoch'] vocab_size = len(self.qa_train.index_to_word) options['model_options']['vocab_size'] = vocab_size model_name = self.mo['model_name'] self.model = Models.model(model_name)(options) self.log("context length: %d" % self.mo['context_length'])
'''This is finding predicted SADs for the world bird populations. Well it was a nice try but I ran out of memory.''' SADModels = ['SimBrokenStick', 'SimLogNormInt', 'SimpleRandomFraction', 'SimParetoInt'] N = 40000 S = 100 sample_size = 100 fig = plt.figure() for i, model in enumerate(SADModels): fig.add_subplot(2, 2, i+1) if model == 'SimBrokenStick': prdSADs = Models.SimBrokenStick(N, S, sample_size) HeatMap.RACHeatMap(fig, prdSADs) plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2) print 'BS' elif model == 'SimLogNormInt': prdSADs = Models.SimLogNormInt(N, S, sample_size) HeatMap.RACHeatMap(fig, prdSADs) plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2) print 'SLN' elif model == 'SimpleRandomFraction': prdSADs = Models.SimpleRandomFraction(N, S, sample_size) HeatMap.RACHeatMap(fig, prdSADs) plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2) print 'RandFrac'
def NeuralNetwork(encode_type="LabelEncode"): df = Base_Process(encode_type) # 加一个地理位置聚类 df = pd.merge(df, _F_Clsuter_Geo(), on=pri_id, how='left') # 加一个用户活跃TopN省份 市 区 temp = _F_GeoCode(n=1) df = pd.merge(df, temp, on=pri_id, how='left') # 加入distinct的统计 temp = _F_nunique(3) df = pd.merge(df, temp, on=pri_id, how='left') # 加入ratio的统计 temp = _F_nunique_ratio(3) df = pd.merge(df, temp, on=pri_id, how='left') _Train = pd.merge(_train, df, on=pri_id, how='left').fillna(0) _Test = pd.merge(_test, df, on=pri_id, how='left').fillna(0) features = [col for col in _Train.columns if col != pri_id and col != 'y'] _Label = _Train['y'] # 数据输入和结构构造 from keras.models import Sequential model = Sequential() from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout from keras import backend as K import tensorflow as tf import itertools shape = _Train.shape # 卷积层 # model.add(Conv2D(64, (3,3), activation='relu', input_shape = (shape[0],shape[1],1))) # # 池化层 # model.add(MaxPooling2D(pool_size=(2,2))) # # 全连接层 (设置输出层的维度) # model.add(Dense(256, activation='relu')) # # dropout层 # model.add(Dropout(0.5)) # # 最后全连接层,输出概率 # model.add(Dense(1, activation='sigmoid')) # MLP # print(shape) model.add(Dense(64, input_dim=402, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) # 编译(后面要换成自己定义的评价函数) # 本题评分标准 def tpr_weight_funtion(y_true, y_pred): # batch_size, n_elems = y_pred.shape[0],y_pred.shape[1] # idxs = list(itertools.permutations(range(n_elems))) # permutations = tf.gather(y_pred, idxs, axis=-1) # Shape=(batch_size, n_permutations, n_elems) d = pd.DataFrame() # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # d['prob'] = permutations.eval(session=sess) d['prob'] = list(K.eval(y_pred)) d['y'] = list(y_true) d = d.sort_values(['prob'], ascending=[0]) y = d.y PosAll = pd.Series(y).value_counts()[1] NegAll = pd.Series(y).value_counts()[0] pCumsum = d['y'].cumsum() nCumsum = np.arange(len(y)) - pCumsum + 1 pCumsumPer = pCumsum / PosAll nCumsumPer = nCumsum / NegAll TR1 = pCumsumPer[abs(nCumsumPer - 0.001).idxmin()] TR2 = pCumsumPer[abs(nCumsumPer - 0.005).idxmin()] TR3 = pCumsumPer[abs(nCumsumPer - 0.01).idxmin()] return 0.4 * TR1 + 0.3 * TR2 + 0.3 * TR3 def AUC(y_true, y_pred): not_y_pred = np.logical_not(y_pred) y_int1 = y_true * y_pred y_int0 = np.logical_not(y_true) * not_y_pred TP = np.sum(y_pred * y_int1) FP = np.sum(y_pred) - TP TN = np.sum(not_y_pred * y_int0) FN = np.sum(not_y_pred) - TN TPR = np.float(TP) / (TP + FN) FPR = np.float(FP) / (FP + TN) return ((1 + TPR - FPR) / 2) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=[AUC]) # 训练 (batch_size 每次迭代选择的样本数) res = pd.DataFrame() res[pri_id] = _Test[pri_id] _K_Train = pd.DataFrame() _KTrain = pd.DataFrame() _KTrain[pri_id] = _Train[pri_id] # 需要将输入归一化 _Train, _Test = _M.Normalize(_Train[features], _Test[features]) from sklearn.model_selection import StratifiedKFold # 将_Train分成5份,5折之后求平均 skf = StratifiedKFold(n_splits=5) pred = np.zeros((_Test.shape[0], 1)) for train, test in skf.split(_Train, _Label): model.fit(_Train.iloc[train], _Label.iloc[train], epochs=50, batch_size=128) # 连接剩下的一折和test temp = model.predict(_Test) pred += np.asarray(temp) _K_T = pd.DataFrame() _K_T[pri_id] = _KTrain.iloc[test][pri_id] _K_T['mlp'] = model.predict(_Train.iloc[test]) _K_Train = pd.concat((_K_Train, _K_T)) pred /= 5 # 全连接的输出 res['mlp'] = pred res = pd.concat((_K_Train, res)) res.to_csv(data_path + "data/_F_mlp_features.csv", index=False)
''' 加载数据集 ''' train_dataset, test_dataset = Data_Reader.Mnist.Mnist_dataset().get_dataset() loader_train = Data_Reader.get_dataloader(dataset=train_dataset, batch_size=param['batch_size']) loader_test = Data_Reader.get_dataloader(dataset=test_dataset, batch_size=param['test_batch_size']) ''' 搭建模型 模型在model.py里面搭建好了,这里直接调用 ''' modelpath = './train4_AdvT_InputZero.pkl' net = Models.Lenet5.Lenet5() # 加载模型 net = Models.load_state_dict(net, modelpath) base.enable_cuda(net) # 使用cuda num_correct, num_samples, acc = Optimizer.test(net, loader_test) # 测试一下最初的效果 print('[Start] right predict:(%d/%d) ,pre test_acc=%.4f%%' % (num_correct, num_samples, acc)) ''' 训练模型 ''' net.train() # 训练模式 criterion = nn.CrossEntropyLoss() # 损失函数 optimizer = torch.optim.RMSprop( net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) # 优化器,具体怎么优化,学习率、正则化等等 adversary = Adversary.LinfPGD.LinfPGDAttack(net, param['epsilon'])
import Models import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.cross_validation import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn import metrics # step1 - Acquisire in un dataframe tutti i dati del DB # step2 - Rimuovere i campi non utili # step3 - Fill dei campi vuoti con il valore medio e fill delle descrizioni con il titolo # step4 - matrice termini-doc con le long description # step5 - unione della matrice con le altre feature # step6 - creazione di training set e test set # step7 - addestramento modelli predittivi books = Models.GetAllBooks() books_df = pd.DataFrame(books) books_df.set_index('bid') drop_columns = ['thumbnailUrl', 'isbn', '_id'] books_df.drop(drop_columns, inplace=True, axis=1) books_df['longDescription'].fillna(books_df['title'], inplace=True) books_df['pageCount'].fillna(books_df['pageCount'].median, inplace=True) books_df['categories'].fillna('none', inplace=True) books_df['cat'] = books_df['categories'].map( lambda x: x[0] if len(x) > 0 else 'default') # da una lista, prendo solo il primo valore books_df['aut'] = books_df['authors'].map( lambda x: x[0] if len(x) > 0 else 'default') # da una lista, prendo solo il primo valore
(X_train.shape[0], X_train.shape[1], stock.shape[1])) lbls_train = lbls_train.iloc[:-n_past] # 1 hidden layer network with input: n_past x num_features, hidden 120x5, output 2x1 template = [[n_past, stock.shape[1]], [120, 5], [2, 1]] # get Bilinear model projection_regularizer = None projection_constraint = keras.constraints.max_norm(3.0, axis=0) attention_regularizer = None attention_constraint = keras.constraints.max_norm(5.0, axis=1) dropout = 0.1 model = Models.TABL(template, dropout, projection_regularizer, projection_constraint, attention_regularizer, attention_constraint) model.summary() # create class weight class_weight = {0: 1e6 / 300.0, 1: 1e6 / 400.0, 2: 1e6 / 300.0} # training # remove .iloc[2:] for single day model.fit(X_train, lbls_train.iloc[2:], batch_size=256, epochs=100, class_weight=class_weight) model.save('model.h5')
def searchresults(): words = [] info = [] if request.method == 'POST': search_word = request.form['search'] value = request.form['options'] if 'username' in session: message = "you are logged in" currentuser = session['username'] logState = True new_search = Models.Search(search_word, currentuser) Models.db.session.add(new_search) Models.db.session.commit() # WIKI API if value == 'wiki': # For Wikipedia info = [] words = Results.getWikipediaList(search_word) if not words: error = "There are no matches. Search again" return render_template("results.html", results=info, checked=value, searched_word=search_word, logState=logState, words=error) else: for w in words: info.append(Results.getWikiInfo(w)) return render_template("results.html", results=info, checked=value, searched_word=search_word, logState=logState) # STARWARS API elif value == 'sw': try: info = Results.getStarWarsList(search_word) test = info[0] if test == 'NA': error = "There are no results for that search. Please try searching again" return render_template("swerror.html", error=error, checked=value, searched_word=search_word, logState=logState) else: return render_template("starwars.html", person=info, error="", checked=value, searched_word=search_word, logState=logState) except: return "too many requests using the StarWarsAPI! Try using something else." # IMAGE API elif value == 'pic': pictures = Results.getPicture(search_word) return render_template("picture.html", pictures=pictures, checked=value, searched_word=search_word, logState=logState) # Twitter API elif value == 'twit': tweets = TwitterAPI.TwitterAPI() tweet_list = tweets.getTweets(search_word) if not tweet_list: return render_template("twitterError.html") else: return render_template("twitter.html", checked=value, searched_word=search_word, tweetlist=tweet_list, logState=logState) # YouTUBE API elif value == 'youtube': videos = YoutubeAPI.youtube_search(search_word) if not videos: uerror = "No videos found. Try searching something else" return render_template("youtube.html", uerror=uerror, checked=value, searched_word=search_word, logState=logState) else: return render_template("youtube.html", videos=videos, checked=value, searched_word=search_word, logState=logState) # ALL APIS elif value == 'all': list = [] words = Results.getWikipediaList(search_word) for w in words: list.append(Results.getWikiInfo(w)) pictures = Results.getPicture(search_word) picture = pictures[0] # picture = ["one","two", "three"] info = Results.getStarWarsList(search_word) test = info[0] tweets = TwitterAPI.TwitterAPI() tweet_list = tweets.getTweets(search_word) videos = YoutubeAPI.youtube_search(search_word) #checks if starwars api is empty #if its then if test == 'NA': error = "STARWARS API: Nothing found! Try searching again." #check if wikipedia is empty #if wiki is empty if not words: wikierror = "There are no matches. Search again" #if it check if twitter is empty if not tweet_list: terror = "Sorry there are no tweets. Try searching again" if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror, terror=terror, uerror=uerror) else: return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror, terror=terror, videos=videos) else: if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], uerror=uerror) else: return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], videos=videos) #if wiki isnt empty else: if not tweet_list: terror = "Sorry there are no tweets. Try searching again" if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, terror=terror, uerror=uerror) else: return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, terror=terror, videos=videos) else: if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], uerror=uerror) else: return render_template( "allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], videos=videos) #if starwars is not empty else: #check wikipedia if not words: wikierror = "There are no matches. Search again" if not tweet_list: terror = "Sorry there are no tweets. Try searching again" if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror, terror=terror, uerror=uerror) else: return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror, terror=terror, videos=videos) else: if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], uerror=uerror) else: return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], videos=videos) else: if not tweet_list: terror = "Sorry there are no tweets. Try searching again" if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, terror=terror, uerorr=uerror) else: return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, terror=terror, videos=videos) else: if not videos: uerror = "No videos found. Try searching something else" return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], uerror=uerror) else: return render_template( "allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, tweetlist=tweet_list[:3], videos=videos) #REGULAR USER else: logState = False message = "you are not logged in" # WIKI API if value == 'wiki': # For Wikipedia info = [] words = Results.getWikipediaList(search_word) if not words: error = "There are no matches. Search again" return render_template("results.html", results=info, checked=value, searched_word=search_word, logState=logState, words=error) else: for w in words: info.append(Results.getWikiInfo(w)) return render_template("results.html", results=info, checked=value, searched_word=search_word, logState=logState) #STARWARS API elif value == 'sw': try: info = Results.getStarWarsList(search_word) test = info[0] if test == 'NA': error = "There are no results for that search. Please try searching again" return render_template("swerror.html", error=error, checked=value, searched_word=search_word) else: return render_template("starwars.html", person=info, error="", checked=value, searched_word=search_word, logState=logState) except: return "too many requests using the StarWarsAPI! Try using something else." # IMAGE API elif value == 'pic': pictures = Results.getPicture(search_word) return render_template("picture.html", pictures=pictures, checked=value, searched_word=search_word, logState=logState) #ALL APIS elif value == 'all': list = [] words = Results.getWikipediaList(search_word) for w in words: list.append(Results.getWikiInfo(w)) pictures = Results.getPicture(search_word) picture = pictures[0] # picture = ["one","two", "three"] info = Results.getStarWarsList(search_word) test = info[0] if test == 'NA': error = "STARWARS API: Nothing found! Try searching again." if not words: wikierror = "There are no matches. Search again" return render_template("allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror) else: return render_template("allresults.html", error=error, person="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState) else: if not words: wikierror = "There are no matches. Search again" return render_template("allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState, wikierror=wikierror) else: return render_template("allresults.html", person=info, error="", results=list, picture=picture, checked=value, searched_word=search_word, logState=logState)
#!/usr/bin/env python from settings import * from algo_settings import * from CustomModels import * from Models import * if __name__ == '__main__': if validationBool : models = Models(model_list) models.train() models.validation() models.dump() if multipleSubmissions : models.writeSubmissions() if submitBool : model.train() model.validate() print(model.trainPath) print(model.submissionPath) model.writeSubmission() print('Hello World Juju and Ulysse')
from PIL import ImageOps import matplotlib.pyplot as plt import time #Training setttings parser = argparse.ArgumentParser( description='KITTI Depth Completion Task TEST') parser.add_argument('--dataset', type=str, default='kitti', choices=Datasets.allowed_datasets(), help='dataset to work with') parser.add_argument('--mod', type=str, default='mod', choices=Models.allowed_models(), help='Model for use') parser.add_argument('--no_cuda', action='store_true', help='no gpu usage') parser.add_argument('--input_type', type=str, default='rgb', help='use rgb for rgbdepth') # Data augmentation settings parser.add_argument('--crop_w', type=int, default=1216, help='width of image after cropping') parser.add_argument('--crop_h', type=int, default=256, help='height of image after cropping')
def main(): global args global dataset args = parser.parse_args() torch.backends.cudnn.benchmark = args.cudnn best_file_name = glob.glob(os.path.join(args.save_path, 'model_best*'))[0] save_root = os.path.join(os.path.dirname(best_file_name), 'results') if not os.path.isdir(save_root): os.makedirs(save_root) print("==========\nArgs:{}\n==========".format(args)) # INIT print("Init model: '{}'".format(args.mod)) args.channels_in = 1 if args.input_type == 'depth' else 4 model = Models.define_model(args.mod, args) print("Number of parameters in model {} is {:.3f}M".format( args.mod.upper(), sum(tensor.numel() for tensor in model.parameters()) / 1e6)) if not args.no_cuda: # Load on gpu before passing params to optimizer if not args.multi: model = model.cuda() else: model = torch.nn.DataParallel(model).cuda() if os.path.isfile(best_file_name): print("=> loading checkpoint '{}'".format(best_file_name)) checkpoint = torch.load(best_file_name) model.load_state_dict(checkpoint['state_dict']) lowest_loss = checkpoint['loss'] best_epoch = checkpoint['best epoch'] print( 'Lowest RMSE for selection validation set was {:.4f} in epoch {}'. format(lowest_loss, best_epoch)) else: print("=> no checkpoint found at '{}'".format(best_file_name)) return if not args.no_cuda: model = model.cuda() print("Initializing dataset {}".format(args.dataset)) dataset = Datasets.define_dataset(args.dataset, args.data_path, args.input_type) dataset.prepare_dataset() to_pil = transforms.ToPILImage() to_tensor = transforms.ToTensor() norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) depth_norm = transforms.Normalize(mean=[14.97 / args.max_depth], std=[11.15 / args.max_depth]) model.eval() print("===> Start testing") total_time = [] if args.num_samples != 0: random_sampler = Random_Sampler(args.num_samples) with torch.no_grad(): for i, (img, rgb, gt) in tqdm.tqdm( enumerate( zip(dataset.selected_paths['lidar_in'], dataset.selected_paths['img'], dataset.selected_paths['gt']))): raw_path = os.path.join(img) raw_pil = Image.open(raw_path) gt_path = os.path.join(gt) gt_pil = Image.open(gt) assert raw_pil.size == (1216, 352) crop = 352 - args.crop_h raw_pil_crop = raw_pil.crop((0, crop, 1216, 352)) gt_pil_crop = gt_pil.crop((0, crop, 1216, 352)) raw = depth_read(raw_pil_crop, args.sparse_val) if args.num_samples != 0: raw = random_sampler.sample(raw) raw = to_tensor(raw).float() gt = depth_read(gt_pil_crop, args.sparse_val) gt = to_tensor(gt).float() valid_mask = (raw > 0).detach().float() input = torch.unsqueeze(raw, 0).cuda() gt = torch.unsqueeze(gt, 0).cuda() # if args.normal: # input = input/args.max_depth # input = depth_norm(input) if args.input_type == 'rgb': rgb_path = os.path.join(rgb) rgb_pil = Image.open(rgb_path) assert rgb_pil.size == (1216, 352) rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352)) rgb = to_tensor(rgb_pil_crop).float() if not args.normal: rgb = rgb * 255.0 else: rgb = norm(rgb) rgb = torch.unsqueeze(rgb, 0).cuda() input = torch.cat((input, rgb), 1) torch.cuda.synchronize() a = time.perf_counter() output, hidden = model(input, hidden=(None, None)) if 'mod' in args.mod or 'stacked' in args.mod: output = output[0] torch.cuda.synchronize() b = time.perf_counter() total_time.append(b - a) # if args.normal: # output = output*args.max_depth output = torch.clamp(output, min=0, max=85) output = output * 256. raw = raw * 256. output = output[0][0:1].cpu() data = output[0].numpy() if crop != 0: padding = (0, 0, crop, 0) output = torch.nn.functional.pad(output, padding, "constant", 0) output[:, 0:crop] = output[:, crop].repeat(crop, 1) pil_img = to_pil(output.int()) assert pil_img.size == (1216, 352) pil_img.save(os.path.join(save_root, os.path.basename(img))) print('average_time: ', sum(total_time[100:]) / (len(total_time[100:]))) print('num imgs: ', i + 1)
test_data = util.SICKData('test') #test_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle=True, num_workers=1) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True, drop_last=True) embedding_model = util.readEmbedding() ## # Model print('==> Building model..') if args.model == 'biRNN': net = Models.biRNN(embedding_model, batch_size=args.batch_size, hidden_size=args.hidden_size, embedding_dim=300, dropout=args.dropout_lstm) #TODO: add more models net.to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr) CEloss = nn.BCELoss() start_epoch = 0 # start from epoch 0 or last checkpoint epoch # Load checkpoint. if args.checkpoint: print('==> Resuming from checkpoint..') print(args.checkpoint) checkpoint = torch.load(args.checkpoint) net.load_state_dict(checkpoint['net_state_dicts'])
def train(lr=0.01, gpu=0, epochs=500, file_name='DefaultFileName', charge=None, save=True, batch_size=64, epochs_for_saving=3): #Stablishing the device device = 'cuda:' + str(gpu) if torch.cuda.is_available() else 'cpu' if device == 'cpu': warnings.warn(message="Executing on CPU!", category=ResourceWarning) #Generating the model #Change this line for changing the model to create model = models.ThreeLayerSigmoidRegressor() if charge is not None: model = load_model(model=model, file_name=charge) model = model.to(device) #Training Parameters criterion = RMSLELoss() #nn.MSELoss()#nn.L1Loss() optimizer = optim.Adam( model.parameters(), lr=lr) # optim.SGD(model.parameters(), lr=lr, momentum=momentum) #Generating the Dataset dataset = ASHRAEDataset(erase_nans=False) train_len, validation_len = int(math.ceil(0.95 * len(dataset))), int( math.ceil(0.04 * len(dataset))) train, validation, test = data.random_split( dataset, (train_len, validation_len, len(dataset) - train_len - validation_len)) #Pass to Dataloader for reading batches train = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) validation = DataLoader(validation, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) #Uncomment for seeing pearson correlations #pearsons_of_each_variable(data=train) #Writer for plotting graphic in tensorboard writer = SummaryWriter(comment=file_name) print('Starting the training...') print("Batch Size: " + str(batch_size)) print("Running in: " + device) for i in range(epochs): train_step(model=model, data=train, criterion=criterion, optimizer=optimizer, epoch=i, device=device, writer=writer, verbose=True) validation_step(model=model, data=validation, criterion=criterion, epoch=i, device=device, writer=writer) if save and i % epochs_for_saving == 0: writer.flush() model = model.cpu() save_model(model=model, file_name=file_name) model = model.to(device) writer.close()
np.random.seed(117) # set numpy seed to get consistent data dataGen = GenerateARXData(noise_form=noise_form) X, Y, E = dataGen(N, 1) # Scale the data scale = Y.max() X = X / scale Y = Y / scale E = E/scale # simulate test data set X_test, Y_test, _ = dataGen(N_test, 1) X_test = X_test/scale Y_test = Y_test/scale net = Models.EBM_ARX_net(feature_net_dim=hidden_dim,predictor_net_dim=hidden_dim, decay_rate=0.99, num_epochs=150, use_double=False) net.fit(X, Y) training_losses = net.training_losses plt.plot(training_losses) plt.title('Training loss') plt.xlabel('epoch') plt.show() # make baseline predictions of test data set using least squares estim_param, _resid, _rank, _s = linalg.lstsq(X, Y) mse_baseline = np.mean((X_test @ estim_param - Y_test) ** 2) # # make predictions of test data set using trained EBM NN yhat, prediction_scores = net.predict(X_test)
### parse the full xml files and extract the relevant data print('Parsing courses...') data = ET.parse('data/courses_full.xml').getroot() print(str(len(data)) + ' courses found') courses = [] for course in data.findall('Courses/FullXML/Course'): title = course.find('Title[@Lang="en-GB"]').attrib.get('Title') ects = course.find('Point').text lang = course.find('Teaching_Language').attrib.get('LangCode') prereqs = [] prereq_node = course.find('Qualified_Prerequisites/Qualified_Prerequisites_Txt[@Lang="en-GB"]') if prereq_node != None: prereqs = Models.parse_prereqs(prereq_node.attrib.get('Txt')) course = Models.Course(title, course.attrib.get('CourseID'), course.attrib.get('CourseCode'), prereqs, ects, lang) courses.append(course) dicts = [c.__dict__ for c in courses] f = open('data/courses_full.json', 'w') f.write(json.dumps(dicts) + '\n') f.close()
test_random_indices_poly = random_indices_poly[train_split_poly:] # Class for loading Polygons sequence from a sequence folder denoise_generator_poly = GP.DenoiseHPatchesPoly_Exp6( random_indices_poly=train_random_indices_poly, inputs=Inputs, labels=Labels, batch_size=50) denoise_generator_val_poly = GP.DenoiseHPatchesPoly_Exp6( random_indices_poly=test_random_indices_poly, inputs=Inputs, labels=Labels, batch_size=50) shape = (64, 64, 4) denoise_model = Models.get_baseline_model(shape) # ===================================== Train ===================================== epochs = 1 Train_Denoiser.train_denoiser(denoise_generator_poly, denoise_generator_val_poly, denoise_model, epochs) # ===================================== Output results ===================================== imgs, imgs_clean = next(iter(denoise_generator_val_poly)) index = np.random.randint(0, imgs.shape[0]) imgs_den = denoise_model.predict(imgs)
def post(self): self.response.out.write("added <br />") user = users.get_current_user() #See if the user choose to upload a payslip or other if self.request.POST['type'] == "payslip": self.response.out.write("payslip <br />") payslip = Models.Payslip(parent=Models.payslip_key(user.user_id())) #Set the model attributes payslip.ownerId = user.user_id() payslip.beginning = datetime.datetime.strptime(self.request.POST['beginning'],'%Y-%m-%d').date() payslip.ending = datetime.datetime.strptime(self.request.POST['ending'],'%Y-%m-%d').date() payslip.income = float(self.request.POST['income']) payslip.tax = float(self.request.POST['tax']) payslip.net = float(self.request.POST['income']) - float(self.request.POST['tax']) payslip.company = self.request.POST['company'] # Create the file file_name = files.blobstore.create(mime_type='application/octet-stream') # Open the file and write to it with files.open(file_name, 'a') as f: f.write("data") # Finalize the file. Do this before attempting to read it. files.finalize(file_name) # Get the file's blob key payslip.file_key = files.blobstore.get_blob_key(file_name) #Output the given form to the confirmation page self.response.out.write(payslip.ownerId + "<br/>" ) self.response.out.write(str(payslip.upload_date) + "<br/>") self.response.out.write(str(payslip.beginning) + "<br/>") self.response.out.write(str(payslip.ending) + "<br/>") self.response.out.write(str(payslip.income) + "<br/>") self.response.out.write(str(payslip.tax) + "<br/>") self.response.out.write(str(payslip.net) + "<br/>") self.response.out.write(str(payslip.company) + "<br/>") #add the model to the data store payslip.put() self.redirect('/payslips') else: self.response.out.write("other <br />") file = Models.File(parent=Models.file_key(user.user_id())) #Set the model attributes file.ownerId = user.user_id() file.title = self.request.POST['title'] file.description = self.request.POST['description'] form_data = cgi.FieldStorage() if form_data.getvalue('file'): subject = form_data.getvalue('file') # Create the file file_name = files.blobstore.create(mime_type='application/octet-stream') # Open the file and write to it with files.open(file_name, 'a') as f: f.write(subject) # Finalize the file. Do this before attempting to read it. files.finalize(file_name) # Get the file's blob key file.file_key = files.blobstore.get_blob_key(file_name) else: subject = "Not set" #Output the given form to the confirmation page self.response.out.write( file.ownerId + "<br/>" ) self.response.out.write(file.title + "<br/>") self.response.out.write(file.description + "<br/>") #add the model to the data store file.put() self.redirect('/files')
def validate_unet(train_dataset, val_dataset=None, net_depth=4, val_steps=100, loss=None, tag=''): """Run full volume CPU validation on both training and validation.""" if val_dataset is None: val_dataset = train_dataset Logger.info("Validate unet predictions for training dataset %s on %s" % (train_dataset.name, val_dataset.name)) with tf.device('/cpu:0'): model = Models.UNet(train_dataset.n_classes, depth=net_depth, n_channels=train_dataset.n_modalities) loaddir = Tools.get_dataset_savedir(train_dataset, loss) weights_file = '%s/best_weights.h5' % loaddir secondary_weights_file = '%s/weights.h5' % loaddir if loss is None: loss = 'sparse_categorical_crossentropy' model.compile(loss=loss, optimizer='sgd') # tr_gen, val_gen = dataset.get_full_volume_generators(patch_multiplicity=model.patch_multiplicity, # infinite=False) val_gen = val_dataset.get_val_generator( patch_multiplicity=model.patch_multiplicity, infinite=False) if val_dataset is not train_dataset: val_gen = BatchGenerator.ModalityFilter(val_gen, val_dataset.modalities, train_dataset.modalities) load_weights(model, weights_file, secondary_weights_file) # for generator in [val_gen]: generator = val_gen Logger.info('Running validation on %s, trained with %s' % (val_dataset.name, train_dataset.name)) metrics = [] for _, y_true, y_pred in model.predict_generator(generator, steps=val_steps): ignore_mask = y_true == -1 Logger.debug('y_pred labels:', set(y_pred.flat), '- y_true labels:', set(y_true.flat)) y_true[ignore_mask] = 0 y_pred[ignore_mask] = 0 new_metrics = MetricsMonitor.MetricsMonitor.getMetricsForWholeSegmentation( y_pred, y_true, labels=range(1, model.n_classes)) new_metrics = np.squeeze(new_metrics, axis=0) # new_metrics = np.nan_to_num(np.squeeze(new_metrics, axis=0)) metrics.append(new_metrics) # Note: this assumes FetcherThread generates images in the order given by paths. paths = generator.paths metrics = np.array(metrics) metric_labels = [ 'Accuracy', 'Sensitivity', 'Specificity', 'Dice', 'Jaccard' ] df = pd.DataFrame() for i, clss in enumerate(val_dataset.classes[1:]): tmp_df = pd.DataFrame(metrics[:, i, :], columns=metric_labels) tmp_df['Class'] = clss tmp_df['Path'] = paths df = df.append(tmp_df) if tag: df['Loss function'] = tag Logger.debug('Validation metrics:\n', df.groupby(['Loss function', 'Class']).mean()) metrics_file = '%s/validation_metrics' % loaddir if val_dataset is not train_dataset: metrics_file += '_' + val_dataset.name metrics_file += '.csv' Logger.info('Saving validation metrics to', metrics_file) # np.save(metrics_file, np.array(gen_metrics)) df.to_csv(metrics_file)
n_hidden = 100 # Number of hidden units n_layers = 2 # Number of LSTM layers p_dropout = 0.2 # Probability of dropout bidir = False # Sets if the RNN layer is bidirectional or not if use_delta_ts == 'normalized': # Count the delta_ts column as another feature, only ignore ID, timestamp and label columns n_inputs = dataset.n_inputs + 1 elif use_delta_ts == 'raw': raise Exception('ERROR: When using a model of type Vanilla RNN, we can\'t use raw delta_ts. Please either normalize it (use_delta_ts = "normalized") or discard it (use_delta_ts = False).') # Instantiating the model: model = Models.VanillaRNN(n_inputs, n_hidden, n_outputs, n_layers, p_dropout, embed_features=embed_features, n_embeddings=n_embeddings, embedding_dim=embedding_dim, bidir=bidir) model # Define the name that will be given to the models that will be saved: model_name = 'rnn' if dataset_mode == 'pre-embedded': model_name = model_name + '_pre_embedded' elif dataset_mode == 'learn embedding': model_name = model_name + '_with_embedding' elif dataset_mode == 'one hot encoded': model_name = model_name + '_one_hot_encoded' if use_delta_ts is not False: model_name = model_name + '_delta_ts' model_name
def visualize_unet(train_dataset, val_dataset=None, net_depth=4, loss=None, savefile='', plot=False, full_volume=True, skip=0): """Compute one MultiUNet prediction and visualize against ground truth.""" if val_dataset is None: val_dataset = train_dataset Logger.info( "Visualizing unet predictions for training dataset %s on an image from %s" % (train_dataset.name, val_dataset.name)) device = '/cpu:0' if full_volume else '/gpu:0' with tf.device(device): model = Models.UNet(train_dataset.n_classes, depth=net_depth, n_channels=train_dataset.n_modalities) if full_volume: generator = val_dataset.get_val_generator( batch_size=1, patch_multiplicity=model.patch_multiplicity, infinite=False) else: generator = val_dataset.get_val_generator( (128, 128, 128), transformations=BatchGenerator.Transformations.CROP, patch_multiplicity=model.patch_multiplicity, batch_size=1, infinite=False) if val_dataset is not train_dataset: generator = BatchGenerator.ModalityFilter(generator, val_dataset.modalities, train_dataset.modalities) savedir = Tools.get_dataset_savedir(train_dataset, loss) weights_file = '%s/best_weights.h5' % savedir secondary_weights_file = '%s/weights.h5' % savedir if loss is None: loss = 'sparse_categorical_crossentropy' model.compile(loss=loss, optimizer='adam') # print(model.summary(line_length=150, positions=[.25, .55, .67, 1.])) load_weights(model, weights_file, secondary_weights_file) Logger.info('Predicting image', generator.paths[skip]) for i in range(skip): next(generator) if full_volume: x, y, y_pred = next(model.predict_generator(generator, steps=1)) else: x, y = next(generator) y_pred = model.predict(x, modalities=val_dataset.modalities) x = x[0, ..., 0] y = y[0, ...] y_pred = y_pred[0] if savefile: Helpers.save_predictions(x, y, y_pred, savefile=savefile) if plot: Helpers.visualize_predictions(x, y, y_pred)
train_option['data_name'] = data_name.replace('/', '_') if gray_scale: train_option['lr'] = [1e-2, 2e-4] train_option['l2_reg'] = [1e-4, 5e-4] # training and validation data (I: cam-captured envelope; J: hidden content GT) train_data = {'I': cam_train[:num_train, :, :, :], 'J': gt_train[:num_train, :, :, :]} valid_data = {'I': cam_valid, 'J': gt_valid} for degradation in degradation_list: # for repeatability resetRNGseed(0) # WarpingNet for geometric correction warping_net = Models.WarpingNet(chan_in=train_data['I'].shape[1], out_size=gt_train.shape[2:4]) if torch.cuda.device_count() >= 1: warping_net = nn.DataParallel(warping_net, device_ids=device_ids).to(device) # Dehazing and RefineNet dehazing_refine_net = Models.DehazingRefineNet(chan_in=train_data['I'].shape[1], chan_out=train_data['J'].shape[1], degradation=degradation) if torch.cuda.device_count() >= 1: dehazing_refine_net = nn.DataParallel(dehazing_refine_net, device_ids=device_ids).to(device) # Neural-STE model = Models.NeuralSTE(warping_net, dehazing_refine_net, degradation=degradation) # with GAN if torch.cuda.device_count() >= 1: model = nn.DataParallel(model, device_ids=device_ids).to(device) # train print('-------------------------------------- Training Options -----------------------------------') print("\n".join("{}: {}".format(k, v) for k, v in train_option.items())) print('-------------------------------------- Start training {:s} ---------------------------'.format(model.module.name))
def train_unet(dataset, epochs=1, steps_per_epoch=200, batch_size=7, patch_shape=(32, 32, 32), net_depth=4, loss=None, sample_bg=False): """Build UNet, load the weights (if any), train, save weights.""" Logger.info("Training unet on %s" % (dataset.name)) savedir = Tools.get_dataset_savedir(dataset, loss) weights_file = '%s/weights.h5' % savedir best_weights_file = '%s/best_weights.h5' % savedir epoch_file = '%s/last_epoch.txt' % savedir metrics_file = '%s/metrics.csv' % savedir full_volume_metrics_file = '%s/full_volume_metrics' % savedir tensorboard_dir = '%s/tensorboard' % savedir if os.path.isfile(epoch_file): initial_epoch = int(open(epoch_file, 'r').readline()) else: initial_epoch = 0 epochs += initial_epoch n_classes = dataset.n_classes n_channels = dataset.n_modalities if loss is None: loss = 'sparse_categorical_crossentropy' model = Models.UNet(n_classes, depth=net_depth, n_channels=n_channels) # print('patch_multiplicity', model.patch_multiplicity) # patch_tr_gen = dataset.get_train_generator(patch_shape, batch_size=batch_size) # patch_val_gen = val_dataset.get_val_generator(patch_shape=(128, 128, 128)) patch_tr_gen, patch_val_gen = dataset.get_patch_generators( patch_shape, batch_size=batch_size, sample_train_bg=sample_bg) # full_tr_gen, full_val_gen = dataset.get_full_volume_generators(model.patch_multiplicity) model.compile( loss=loss, # optimizer='adam', optimizer=keras.optimizers.Adam(lr=0.0002), metrics=[sparse_categorical_accuracy, Metrics.discrete_mean_dice_coef]) print(model.summary(line_length=150, positions=[.25, .55, .67, 1.])) load_weights(model, weights_file) Tools.ensure_dir(savedir) model_checkpoint = ModelCheckpoint(weights_file, monitor='val_loss', save_best_only=False) best_model_checkpoint = ModelCheckpoint(best_weights_file, monitor='val_loss', save_best_only=True) for file in glob.glob('tensorboard/*'): os.remove(file) tensorboard = Metrics.TrainValTensorBoard(log_dir=tensorboard_dir, histogram_freq=0, write_graph=True, write_images=True) # def sched(epoch, lr): # return lr * .99 # lr_sched = LearningRateScheduler(sched, verbose=1) # full_volume_validation = Metrics.FullVolumeValidationCallback(model, # full_val_gen, metrics_savefile=full_volume_metrics_file, validate_every_n_epochs=10) h = model.fit_generator( patch_tr_gen, steps_per_epoch=steps_per_epoch, initial_epoch=initial_epoch, epochs=epochs, validation_data=patch_val_gen, validation_steps=10, callbacks=[ model_checkpoint, best_model_checkpoint, tensorboard, # lr_sched, # full_volume_validation ]) # Write metrics to a csv. keys = sorted(h.history.keys()) if not os.path.exists(metrics_file): metrics_f = open(metrics_file, 'w') metrics_writer = csv.writer(metrics_f) metrics_writer.writerow(keys) else: metrics_f = open(metrics_file, 'a') metrics_writer = csv.writer(metrics_f) metrics_writer.writerows(zip(*[h.history[key] for key in keys])) open(epoch_file, 'w').write(str(epochs)) Logger.info("Done")
y_cv_cat = np_utils.to_categorical(y_cv.flatten(), modelParams.nb_classes) y_cv_cat = y_cv_cat.reshape( (y_cv.shape[0], y_cv.shape[1] * y_cv.shape[2], modelParams.nb_classes)) loss = model.evaluate(x_cv, y_cv_cat, batch_size=modelParams.batchSize) logging.info("Loss: {0}".format(str(loss))) return model modelsPath = join(DataTools.inDir, "models") if not exists(modelsPath): makedirs(modelsPath) model = Models.getUNet(mp.input_shape, mp.nb_classes) #model = load_model(join(modelsPath, "gnet_gray_test_5.hdf5")) allTrainIds = DataTools.trainImageIds trainImages = [ '6110_3_1', '6100_2_3', '6040_1_3', '6010_4_4', '6140_3_1', '6110_1_2', '6060_2_3' ] # np.random.permutation(allTrainIds)[:7] # '6040_1_3' - do not use # '6010_4_4' - do not use checkpointer = ModelCheckpoint(filepath="unet_weights.{epoch:02d}.hdf5", verbose=1, save_best_only=True) csv_logger = CSVLogger('training.log')
def add_song(self, user): user = user + "_playlist" playlist_name = "upload" Models.add_song_to_playlist(self, playlist_name, user)
def train(model_name="CNN", batch_size=32, nb_epoch=2000, dataset="mnist", optimizer="CDGD", nb_agents=5, step_eval=20, **kwargs): paramExpla = [ "model_name", "optimizer", "dataset", "nb_epoch", "batch_size", "nb_agents" ] + list(kwargs.keys()) parameters = [ model_name, optimizer, dataset, nb_epoch, batch_size, nb_agents ] + list(kwargs.values()) print('\nStarting Process:') print(list(zip(paramExpla, parameters))) if dataset == "cifar10": (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() if dataset == "cifar100": (X_train, Y_train), (X_test, Y_test) = cifar100.load_data() if dataset == "mnist": (X_train, Y_train), (X_test, Y_test) = mnist.load_data() X_train = X_train.reshape((X_train.shape[0], 28, 28, 1)) X_test = X_test.reshape((X_test.shape[0], 28, 28, 1)) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255. X_test /= 255. img_dim = X_train.shape[-3:] nb_classes = len(np.unique(Y_train)) Y_train = np_utils.to_categorical(Y_train, nb_classes) Y_test = np_utils.to_categorical(Y_test, nb_classes) ins = [X_train, Y_train] num_train_samples = ins[0].shape[0] agent_data_size = (num_train_samples // nb_agents) x_data = {} y_data = {} x_vali = {} y_vali = {} for i in range(nb_agents): x_data['input_' + str(i + 1)] = X_train[i * agent_data_size:(i + 1) * agent_data_size] y_data['d' + str(i + 1)] = Y_train[i * agent_data_size:(i + 1) * agent_data_size] x_vali['input_' + str(i + 1)] = X_test y_vali['d' + str(i + 1)] = Y_test test_batch_size = 1024 if 'test_batch_size' in kwargs: test_batch_size = kwargs['test_batch_size'] trainData = tf.data.Dataset.from_tensor_slices( (x_data, y_data)).shuffle(1024).repeat().batch(batch_size).prefetch(1) testData = tf.data.Dataset.from_tensor_slices( (x_vali, y_vali)).batch(test_batch_size) #print(trainData) #print(testData) lr = 1e-3 if 'lr' in kwargs: lr = kwargs['lr'] if lr == 'PolynomialDecay': if not 'starter_learning_rate' in kwargs: raise ValueError( 'For TernGrad, must specify the "starter_learning_rate" parameter' ) if not 'decay_steps' in kwargs: raise ValueError( 'For TernGrad, must specify the "decay_steps" parameter') if not 'end_learning_rate' in kwargs: raise ValueError( 'For TernGrad, must specify the "end_learning_rate" parameter' ) if not 'power' in kwargs: raise ValueError( 'For TernGrad, must specify the "power" parameter') lr = tf.keras.optimizers.schedules.PolynomialDecay( kwargs['starter_learning_rate'], kwargs['decay_steps'], kwargs['end_learning_rate'], power=kwargs['power']) if nb_agents != 1: topology = 'full' if 'topology' in kwargs: topology = kwargs['topology'] always_update = False if 'always_update' in kwargs: always_update = kwargs['always_update'] big_k = 1 maxLam = 0.01 params = Params(nb_agents, big_k, always_update=always_update, topology=topology) if optimizer == "CDGD": if not 'c1' in kwargs: raise ValueError('For CDGD, must specify the "c1" parameter') if not 'delta' in kwargs: raise ValueError('For CDGD, must specify the "delta" parameter') params = Params(nb_agents, big_k, always_update=always_update, topology=topology, maxLam=maxLam) opt = CDGD(lr=1E-2, decay=0, nesterov=False, nb_agents=nb_agents, params=params, c1=kwargs['c1'], delta=kwargs['delta']) elif optimizer == "QCDGD": if not 'c1' in kwargs: raise ValueError('For QDGD, must specify the "c1" parameter') if not 'clipStd' in kwargs: raise ValueError('For QDGD, must specify the "clipStd" parameter') if not 'ternSt' in kwargs: raise ValueError('For QDGD, must specify the "ternSt" parameter') if nb_agents == 1: big_k = 1 maxLam = 0.01 topology = 'full' always_update = False params = Params(nb_agents, big_k, always_update=always_update, topology=topology, maxLam=maxLam) opt = QCDGD(lr=1E-2, decay=0, nesterov=False, nb_agents=nb_agents, params=params, ternSt=kwargs['ternSt'], clip=kwargs['clipStd'], c1=kwargs['c1']) initer = 'glorot_uniform' if 'initer' in kwargs: initer = kwargs['initer'] identical = True if 'identical' in kwargs: identical = kwargs['identical'] model = Models.load(model_name, img_dim, nb_classes, opt, nb_agents=nb_agents, identical=identical, kernel_initializer=initer) # model.summary() #stop step_list = [] train_losses, train_accs = [], [] val_losses, val_accs = [], [] #data = model.fit(X_agent_ins, Y_agent_ins, validation_data=(x_validation, y_validation), epochs=nb_epoch // step_eval, steps_per_epoch=step_eval, batch_size=batch_size, shuffle=True) data = model.fit(trainData, validation_data=testData, epochs=nb_epoch // step_eval, steps_per_epoch=step_eval, batch_size=batch_size, shuffle=True) #data = model.fit(generator, validation_data=(x_validation, y_validation), epochs=nb_epoch // step_eval, steps_per_epoch=step_eval, batch_size=batch_size, shuffle=True) keys = list(data.history.keys()) offset = 1 if nb_agents == 1: offset = 0 train_losses = [ sum(x) for x in zip( * [data.history.get(key) for key in keys[offset:offset + nb_agents]]) ] train_accs = [ sum(x) / nb_agents for x in zip(*[ data.history.get(key) for key in keys[offset + nb_agents:offset + 2 * nb_agents] ]) ] val_losses = [ sum(x) for x in zip(*[ data.history.get(key) for key in keys[2 * offset + 2 * nb_agents:2 * offset + 3 * nb_agents] ]) ] val_accs = [ sum(x) / nb_agents for x in zip(*[ data.history.get(key) for key in keys[2 * offset + 3 * nb_agents:2 * offset + 4 * nb_agents] ]) ] step_list = [*range(step_eval, nb_epoch + 1, step_eval)] del model return paramExpla, parameters, step_list, train_losses, train_accs, val_losses, val_accs
def train(setting_dict): settings = SegSettings(setting_dict, write_logger=True) my_logger(settings.simulation_folder + '\logger') # Initialize model: model = models.Unet_2D( encoder_name=settings.encoder_name, encoder_depth=settings.encoder_depth, encoder_weights=settings.encoder_weights, decoder_use_batchnorm=settings.decoder_use_batchnorm, decoder_channels=settings.decoder_channels, in_channels=settings.in_channels, classes=settings.classes, activation=settings.activation) model.cuda(1) model = model.double() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=settings.initial_learning_rate) # Initialize 'data_dic', nested dictionary, will contain all losses and dice for all organs organs = [ 'all_organs', 'spleen', 'prostate', 'lits', 'brain', 'pancreas', 'hepatic_vessel', 'left_atrial' ] count_type = ['total_epochs', 'cur'] loss_type = ['CE', 'bg_dice', 'organ_dice'] set_type = ['Training', 'Validation'] partition_by_organ = dict(zip(organs, [list() for i in organs])) #first dic - organs partition_by_count = dict( zip(count_type, [partition_by_organ for i in count_type])) partition_by_dice = dict( zip(loss_type, [partition_by_count.copy() for i in loss_type])) data_dic = dict(zip(set_type, [partition_by_dice.copy() for i in set_type])) ##this is the final dic ## data dic shape: # {Training: { Cross Entropy: {all epochs: {spleen:[], prostate:[] etc} # {cur_epoch: {spleen:[], prostate:[] etc}} # organ dice: {all epochs: {spleen:[], prostate:[] etc} # # {cur_epoch: {spleen:[], prostate:[] etc}} # background dice: {all epochs: {spleen:[], prostate:[] etc} # # {cur_epoch: {spleen:[], prostate:[] etc}}}} # {Validation: { Cross Entropy: {all epochs: {spleen:[], prostate:[] etc} # {cur_epoch: {spleen:[], prostate:[] etc}} # organ dice: {all epochs: {spleen:[], prostate:[] etc} # # {cur_epoch: {spleen:[], prostate:[] etc}} # background dice: {all epochs: {spleen:[], prostate:[] etc} # # {cur_epoch: {spleen:[], prostate:[] etc}}}} #Initialize datasets train_dataset_list = [] val_dataset_list = [] for organ in organs[1:]: organ_train_dataset = Seg_Dataset( organ, settings.definition_file_dir + '/' + organ + '/Training', settings.definition_file_dir + '/' + organ + '/Training_Labels', 2, settings) organ_val_dataset = Seg_Dataset( organ, settings.definition_file_dir + '/' + organ + '/Validation', settings.definition_file_dir + '/' + organ + '/Validation_Labels', 2, settings) train_dataset_list.append(organ_train_dataset) val_dataset_list.append(organ_val_dataset) train_dataset = torch.utils.data.ConcatDataset(train_dataset_list) val_dataset = torch.utils.data.ConcatDataset(val_dataset_list) print(len(train_dataset)) batch_size = settings.batch_size train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0) print('Training... ') num_epochs = 3 for epoch in range(0, num_epochs): epoch_start_time = time.time() total_steps = len(train_dataloader) for i, sample in enumerate(train_dataloader, 1): if i > 50: break model.train() images = sample['image'].double() masks = sample['mask'].type(torch.LongTensor) masks = masks.unsqueeze(1) images = images.to("cuda:1") masks = masks.to("cuda:1") masks = masks.type(torch.LongTensor) masks = masks.cuda(1) #Forward pass outputs = model(images, sample['task']) outputs = outputs.to("cuda:1") loss = criterion(outputs.double(), masks[:, 0, :, :]) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() print( f"Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}", ) logging.info('current task: ' + sample['task'][0]) logging.info( f"Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}", ) dices = dice(outputs, masks, sample['num_classes'][0], settings) background_dice = dices[1] organ_dice = dices[2] #Update data dic for relevant organ data_dic['Training']['organ_dice']['cur'][sample['task'] [0]].append(organ_dice) data_dic['Training']['bg_dice']['cur'][sample['task'][0]].append( background_dice) data_dic['Training']['CE']['cur'][sample['task'][0]].append( loss.item()) #update data dic [all organ] data_dic['Training']['organ_dice']['cur']['all_organs'].append( organ_dice) data_dic['Training']['bg_dice']['cur']['all_organs'].append( background_dice) data_dic['Training']['CE']['cur']['all_organs'].append(loss.item()) if i % 30 == 0: #save output every 30 batches save_output = outputs.cpu().detach().numpy() save_samp(sample['image'][0], sample['mask'][0], sample['task'][0], save_output[0][1], epoch, i, settings.snapshot_dir, organ_dice) if i % 50 == 0: #print details every 50 batches print( 'curr train loss: {} train organ dice: {} train background dice: {} \t' 'iter: {}/{}'.format( np.mean( data_dic['Training']['CE']['cur']['all_organs']), data_dic['Training']['organ_dice']['cur'] ['all_organs'], np.mean(data_dic['Training']['bg_dice']['cur'] ['all_organs']), i + 1, len(train_dataloader))) logging.info( 'curr train loss: {} train organ dice: {} train background dice: {} \t' 'iter: {}/{}'.format( np.mean( data_dic['Training']['CE']['cur']['all_organs']), data_dic['Training']['organ_dice']['cur'] ['all_organs'], np.mean(data_dic['Training']['bg_dice']['cur'] ['all_organs']), i + 1, len(train_dataloader))) #Update data_dic['total_epochs'] for l in loss_type: for organ in organs: data_dic['Training'][l]['total_epochs'][organ].append( np.mean(data_dic['Training'][l]['cur'][organ])) ## Validation total_steps = len(val_dataloader) for i, data in enumerate(val_dataloader): if i > 50: break model.eval() images = data['image'].double() masks = data['mask'].type(torch.LongTensor) masks = masks.unsqueeze(1) images = images.to("cuda:1") masks = masks.to("cuda:1") outputs = model(images, data['task']) outputs = outputs.to("cuda:1") loss = criterion(outputs.double(), masks[:, 0, :, :]) print( f"Validation Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}", ) logging.info('current task: ' + sample['task'][0]) logging.info( f"Validation Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}", ) dices = dice(outputs, masks, data['num_classes'][0], settings) background_dice = dices[1] organ_dice = dices[2] # Update data dic for relevant organ data_dic['Validation']['organ_dice']['cur'][sample['task'] [0]].append(organ_dice) data_dic['Validation']['bg_dice']['cur'][sample['task'][0]].append( background_dice) data_dic['Validation']['CE']['cur'][sample['task'][0]].append( loss.item()) # Update data dic [all organ] data_dic['Validation']['organ_dice']['cur']['all_organs'].append( organ_dice) data_dic['Validation']['bg_dice']['cur']['all_organs'].append( background_dice) data_dic['Validation']['CE']['cur']['all_organs'].append( loss.item()) # Update data_dic['total_epochs'] for l in loss_type: for organ in organs: data_dic['Validation'][l]['total_epochs'][organ].append( np.mean(data_dic['Training'][l]['cur'][organ])) print('End of epoch {} / {} \t Time Taken: {} min'.format( epoch, num_epochs, (time.time() - epoch_start_time) / 60)) print('train loss: {} val_loss: {}'.format( np.mean(data_dic['Training']['CE']['cur']['all_organs']), np.mean(data_dic['Validation']['CE']['cur']['all_organs']))) print( 'train organ dice: {} train background dice: {} val organ dice: {} val background dice: {}' .format( np.mean( data_dic['Training']['organ_dice']['cur']['all_organs']), np.mean(data_dic['Training']['bg_dice']['cur']['all_organs']), np.mean( data_dic['Validation']['organ_dice']['cur']['all_organs']), np.mean( data_dic['Validation']['bg_dice']['cur']['all_organs'])))
import webapp2 import jinja2 import os import Models from google.appengine.ext import db from google.appengine.api import users payslips = db.GqlQuery("SELECT * " "FROM Payslip " "WHERE ANCESTOR IS :1 ", Models.payslip_key(users.get_current_user().user_id())) income = 0 tax = 0 payslip_count = 0 for payslip in payslips: income+= payslip.income tax += payslip.tax payslip_count += 1 employee = Models.Employee() employee.userid = user.user_id() employee.income = income employee.tax = tax employee.net = income - tax employee.account_type = "employee" employee.put()
if tv_is_completed: break browser_status = WebInteraction.source_login(source, browser) if browser_status is False: ActionLog.log('%s could not logon' % source.login_page) continue else: ActionLog.log( 'Searching via the search form on %s.' % source.domain, db) # we invert the search format and check for each show, not each link in the page for s in search.shows_to_download: ActionLog.log('Searching for %s.' % str(s), db) response_links = WebInteraction.source_search( source, str(s), browser) correct_links = [ l for l in response_links if s.episode_in_link(l.text.lower()) ] ActionLog.log( 'Found %s links for %s on %s' % (str(len(correct_links)), str(s), source.domain), db) search.process_search_result(correct_links, s, browser, source, config) time.sleep( 15) # wait five seconds between searches for warez-bb.org if __name__ == "__main__": database = Models.connect() search_all(database)
def lack_ram(): processed_test = [] processed_val = [] input_var = T.ftensor3('inputs') network,train_fn,val_fn,output = Models.bidirectional_LSTM(input_var=input_var,N_HIDDEN=N_HIDDEN,layer=4,vocab=vocab) processed = [] if(os.path.isfile(weights+'.params')): print("loading Weights") params.read_model_data(network, weights) if(os.path.isfile('stored_batch.p')!=True): if(os.path.isfile('stored_processed.p')!=True): print('Creating processed sentences file') print('Loading english and french data files') english_set = pd.read_csv('../data/processed_en',header=None,delimiter=',',names=['english','en_length']) french_set = pd.read_csv('../data/processed_fr',header=None,delimiter=',',names=['french','fr_length']) print('Combining the files') combined_set = pd.concat([english_set,french_set],axis=1) print('Removing Duplicates') print(len(combined_set['french'])) combined_set = combined_set.drop_duplicates() print(len(combined_set['french'])) print('Grouping sentences together by input and output sentence length') processed = create_training_set(combined_set,BATCH_SIZE,BATCH_SIZE) print('Store batches in a pickle file') pickle.dump(processed,open('stored_processed.p','wb')) gc.collect() else: print('Loading grouped sentences') processed = pickle.load(open('stored_processed.p','rb')) print('number of grouped sentences',len(processed)) #print('Creating matrix file for grouped sentences') gc.collect() #pool = mp.Pool(processes=2) #processed_batch = [pool.apply_async(helpers.convert_to_vector,args=(batch,french_vocab,char_to_ix)) for batch in processed] #processed_batch = [p.get() for p in processed_batch] #for batch in processed: # processed_batch.append(helpers.convert_to_vector(batch,french_vocab,char_to_ix)) #print(len(processed_batch)) #print('Dumping matrix data to file') #pickle.dump(processed_batch,open('stored_batch.p','wb')) else: print('Loading input and output matrix file') processed_batch = pickle.load(open('stored_batch.p','rb')) #print(ix_to_char) print("Shuffle and set validation set") shuffle(processed) #Shuffle Batches processed_test = processed[:len(processed)-50] processed_val = processed[len(processed)-50:] #processed_test = processed[:1] #processed_val = processed[501:502] for i in range(epoch): shuffle(processed_test) train_main_b = 0 train_err = 0 train_batches = 0 start_time = time.time() for batch in processed_test: curr_batch = helpers.convert_to_vector(batch,french_vocab,char_to_ix) fr,eng = helpers.shift_to_input(curr_batch,0,ix_to_vector) train_err += train_fn(fr,eng[:,0]) train_batches += 1 train_main_b += 1 print("new batch ",train_main_b,len(processed_test)) if(train_main_b % 2000 == 0): print("saving model",train_main_b) params.write_model_data(network, weights) for word in range(1,curr_batch[1].shape[1]-1): #print(word) #print(T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval()) #eng[:,0] = T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval().transpose() fr,eng = helpers.shift_to_input([fr,eng],word,ix_to_vector) train_err += train_fn(fr,eng[:,0]) train_batches += 1 #params.write_model_data(network, weights) # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in processed_val: curr_batch = helpers.convert_to_vector(batch,french_vocab,char_to_ix) fr,eng = helpers.shift_to_input(curr_batch,0,ix_to_vector) error,acc = val_fn(fr,eng[:,0]) val_err += error val_acc += acc val_batches += 1 for word in range(1,curr_batch[1].shape[1]-1): #eng[:,0] = T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval().transpose() fr,eng = helpers.shift_to_input([fr,eng],word,ix_to_vector) error,acc = val_fn(fr,eng[:,0]) val_err += error val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( i, epoch, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) params.write_model_data(network, weights)
finally: signal.alarm(0) return res return wraps(func)(wrapper) return decorator df = extract(range(2016, 2021), 'ES') dd = Description.Data(df) dmp = Decompose(df) seconds = 180 methods = ['nm', 'bfgs', 'lbfgs'] model = Models.Arima(df, 0.8) @timeout(seconds, os.strerror(errno.ETIMEDOUT)) def fit(kw, method): start_time = time.time() model.fit(kw, method) error = mean_squared_error(model.time_series(kw, False), model.predict()) print( f"Method: {method}, MSE: {error}, AIC: {model.aic}, Time: {time.time() - start_time} seconds" ) for method in methods: try: fit('jamon', method)
def eICU_model_creator(config): """Constructor function for the model(s) to be optimized. You will also need to provide a custom training function to specify the optimization procedure for multiple models. Args: config (dict): Configuration dictionary passed into ``TorchTrainer``. Returns: One or more torch.nn.Module objects. """ model_class = config.get('model', 'VanillaRNN') if model_class == 'VanillaRNN': return Models.VanillaRNN( config.get('n_inputs', 2090), config.get('n_hidden', 100), config.get('n_outputs', 1), config.get('n_rnn_layers', 2), config.get('p_dropout', 0.2), bidir=config.get('bidir', False), total_length=config.get('total_length', None), embed_features=config.get('embed_features', None), n_embeddings=config.get('n_embeddings', None), embedding_dim=config.get('embedding_dim', None)) elif model_class == 'VanillaLSTM': return Models.VanillaLSTM( config.get('n_inputs', 2090), config.get('n_hidden', 100), config.get('n_outputs', 1), config.get('n_rnn_layers', 2), config.get('p_dropout', 0.2), bidir=config.get('bidir', False), total_length=config.get('total_length', None), embed_features=config.get('embed_features', None), n_embeddings=config.get('n_embeddings', None), embedding_dim=config.get('embedding_dim', None)) elif model_class == 'TLSTM': return Models.TLSTM(config.get('n_inputs', 2090), config.get('n_hidden', 100), config.get('n_outputs', 1), config.get('n_rnn_layers', 2), config.get('p_dropout', 0.2), embed_features=config.get('embed_features', None), n_embeddings=config.get('n_embeddings', None), embedding_dim=config.get('embedding_dim', None), elapsed_time=config.get('elapsed_time', None)) elif model_class == 'MF1LSTM': return Models.MF1LSTM(config.get('n_inputs', 2090), config.get('n_hidden', 100), config.get('n_outputs', 1), config.get('n_rnn_layers', 2), config.get('p_dropout', 0.2), embed_features=config.get( 'embed_features', None), n_embeddings=config.get('n_embeddings', None), embedding_dim=config.get('embedding_dim', None), elapsed_time=config.get('elapsed_time', None)) elif model_class == 'MF2LSTM': return Models.MF2LSTM(config.get('n_inputs', 2090), config.get('n_hidden', 100), config.get('n_outputs', 1), config.get('n_rnn_layers', 2), config.get('p_dropout', 0.2), embed_features=config.get( 'embed_features', None), n_embeddings=config.get('n_embeddings', None), embedding_dim=config.get('embedding_dim', None), elapsed_time=config.get('elapsed_time', None)) else: raise Exception( f'ERROR: {model_class} is an invalid model type. Please specify either "VanillaRNN", "VanillaLSTM", "TLSTM", "MF1LSTM" or "MF2LSTM".' )
def main(sids,logger): SuiteList = DataProvider.getCaseData(logger,sids) logger.debug("run SuiteIDs:",SuiteList.keys()) report = {} # report {sid:{status:pass/fail,cost:time,detail:{case:pass/fail/norun}}} **update 2016-2-16 conf=EnvInit.config() # print conf.host # sys.exit() for sid in SuiteList.keys(): logger.debug("++++++"+sid+"++++++"+"begin") begintime=time.time() report[sid]={} for case in SuiteList[sid]: for pk in case.param.keys(): if case.param[pk].startswith('$$'): logger.debug('debug main ',case.param[pk]) tmpList = case.param[pk][2:].split('.') tmpSid = tmpList[0] tmpCid = tmpList[1] tmpAttrList,tmpFun = getAttrList(case.param[pk],re.compile('\[(.+?)\]')) for tc in SuiteList[sid]: if tc.cid ==tmpCid and tc.sid == tmpSid: case.param[pk]=tc.getResValue(tmpAttrList) logger.debug( "main ...................... update ",case.__hash__()) tc=Models.contain(case.param[pk],Models.RESERVEDWORD.keys()) if tc!=None: case.param[pk]=Models.RESERVEDWORD[tc](case.param[pk]) logger.debug("main param.....",case.cid,case.sid,case.param) r,c = request(case,conf,logger) logger.debug("main response..",c) if r['status']!='200': report[sid]={'status':False, 'cost':time.time()-begintime, 'detail':{(case.cid,case,sid):False}} break case.res = c assertobj = AssertMain(c,case.asex,case.param,logger) logger.debug("main assertobj...",assertobj) if assertobj['status']: report[sid]['status']=True if report[sid].keys().count('detail')>0: report[sid]['detail'][(case.cid,case.sid)]=assertobj else: report[sid]['detail']={(case.cid,case.sid):assertobj} else: report[sid]['status']=False report[sid]['cost']=time.time()-begintime report[sid]['detail'][(case.cid,case.sid)]=assertobj break if case.otherAction!='': logger.debug( case.otherAction) eval(case.otherAction) if report[sid].keys().count('status')==0: report[sid]['status']=True report[sid]['cost']=time.time()-begintime logger.debug("++++++",sid,"++++++","end") logger.debug( report) logger.debug("dump report file begin") GenerateReport.Report(report) logger.debug("dump report file end")
def get(self): user = users.get_current_user() if user: pasyer_users = db.GqlQuery("SELECT * " "FROM Site_User " "WHERE userid = :1 ", user.user_id()) current_user = False for pasyer_user in pasyer_users: current_user = pasyer_user if not current_user: self.redirect("/settings") else: payslips = db.GqlQuery("SELECT * " "FROM Payslip " "WHERE ANCESTOR IS :1 ", Models.payslip_key(user.user_id())) income = 0 tax = 0 payslip_count = 0 for payslip in payslips: income+= payslip.income tax += payslip.tax payslip_count += 1 files = db.GqlQuery("SELECT * " "FROM File " "WHERE ANCESTOR IS :1 ", Models.file_key(user.user_id())) file_count = 0 for file in files: file_count += 1 #set stylesheets needed per page specific_urls = """ <link type="text/css" rel="stylesheet" href="/stylesheets/""" + self.__class__.__name__ + """.css" /> """ dashboard_template_values = { 'name': current_user.name, 'email': current_user.email, 'account_type': current_user.account_type, 'payslip_quantity': payslip_count, 'file_quantity': file_count, 'income': income, 'tax': tax, 'net': income - tax } template = jinja_environment.get_template('Page_Content/dashboard.html') dashboard_template = template.render(dashboard_template_values) url = users.create_logout_url(self.request.uri) nav = """ <nav> <ul> <li><a href="/dashboard">Dashboard</a></li> <li><a href="#">Design</a></li> <li><a href="#">About</a></li> <li><a href="%s">Logout</a></li> </ul> </nav> """ % url template_values = { 'specific_urls':specific_urls, 'nav': nav, 'content': dashboard_template } template = jinja_environment.get_template('index.html') self.response.out.write(template.render(template_values)) else: self.redirect('/')
import Models print(Models.Add(3, 56)) print(Models.Sub(90, 56)) print(Models.Modulus(3, 2)) print(Models.division(9, 3))
def get(self): if users.get_current_user(): # GQL query to get the payslips from the datastore payslips = db.GqlQuery( "SELECT * " "FROM Payslip " "WHERE ANCESTOR IS :1 ", Models.payslip_key(users.get_current_user().user_id()), ) list1 = [0, 0, 0, 0] list2 = [0, 0, 0, 0] # Set the list to the first 4 payslips income and tax for the graph index = 0 for payslip in payslips: if index >= 4: break list1[index] = payslip.income list2[index] = payslip.tax index += 1 # set specific stylesheets and scripts needed per page # set the values for the graph specific_urls = ( """ <link type="text/css" rel="stylesheet" href="/stylesheets/""" + self.__class__.__name__ + """.css" /> <script type="text/javascript" src="https://www.google.com/jsapi"></script> <script type="text/javascript"> google.load("visualization", "1", {packages:["corechart"]}); google.setOnLoadCallback(drawChart); function drawChart() { var data = google.visualization.arrayToDataTable([ ['Payslip', 'Income', 'Tax'], ['1', """ + str(list1[0]) + """, """ + str(list2[0]) + """], ['2', """ + str(list1[1]) + """, """ + str(list2[1]) + """], ['3', """ + str(list1[2]) + """, """ + str(list2[2]) + """], ['4', """ + str(list1[3]) + """, """ + str(list2[3]) + """] ]); var options = { title: 'Income Breakdown', }; var chart = new google.visualization.AreaChart(document.getElementById('chart-div')); chart.draw(data, options); } </script> """ ) # Set the nested template values to the generate payslip function payslip_template_values = {"payslips": generate_payslip_html(self, payslips)} template = jinja_environment.get_template("Page_Content/payslips.html") payslip_template = template.render(payslip_template_values) myFile = open("Page_Content/payslips.html", "r") # Create a log out url for the user url = users.create_logout_url(self.request.uri) # set the nav list nav = ( """ <nav> <ul> <li><a href="/dashboard">Dashboard</a></li> <li><a href="/design">Design</a></li> <li><a href="/about">About</a></li> <li><a href="%s">Logout</a></li> </ul> </nav> """ % url ) template_values = {"specific_urls": specific_urls, "nav": nav, "content": payslip_template} template = jinja_environment.get_template("index.html") self.response.out.write(template.render(template_values)) else: self.redirect("/")
def main(args=None): # Setting up configuration TODO: use docopt if(args): configuration_string = args[0] else: configuration_string = "Configuration/default.conf" config = ConfigParser.ConfigParser() config.read(configuration_string) rawdata_directory = config.get("Directories", "dir_rawdata") storedmodel_directory = config.get("Directories", "dir_storedmodel") cluster_json_directory = config.get("Directories", "dir_clusters") # Parameters TODO: set these in config file # Clustering cluster_feature_names = ["StartLat", "StartLong", "EndLat", "EndLong"] # Which features to cluster over clustering_alg = "KMeans" # Which Clustering Algorithm to use cluster_model_file = config.get("Batch", "cluster_class_file") cluster_params = {"max_clusters": 10, "n_init": 10} # Parameters for KMeans # Initial Classification init_class_alg = "RandomForest" init_class_model_file = config.get("Batch", "init_class_file") init_class_feature_names = ["StartLat", "StartLong"] # Which features to cluster over # Online Classification online_class_alg = "RandomForest" online_class_model_file = config.get("Batch", "online_class_file") online_class_feature_names = ["Latitude", "Longitude", "StartLat", "StartLong"] # Read in batch data raw_data = sc.textFile(rawdata_directory) json_data = raw_data.map(lambda x: json.loads(x)) pair_rdd = json_data.filter(bool).map(lambda x: (x["journey_id"], x)) raw_journeys = pair_rdd.combineByKey(lambda value: [value], lambda acc, value: acc + [value], lambda acc1, acc2: add(acc1, acc2)) processed_journeys = raw_journeys.mapValues(lambda x: Data.load_batch_data(x)) journeys = processed_journeys.map(lambda x: (x[1].data["vin"][0], x[1])) journeys_by_vin = journeys.combineByKey(lambda value: [value], lambda acc, value: acc + [value], lambda acc1, acc2: add(acc1, acc2)) # Build, assign and save clusters journeys_with_id = journeys_by_vin.mapValues(lambda data: Models.cluster(clustering_alg, cluster_feature_names, cluster_params, data)) journeys_with_id.persist() journey_clusters = journeys_with_id.mapValues(lambda journeys: Data.create_journey_clusters(journeys)).persist() journey_clusters_local = journey_clusters.collectAsMap() joblib.dump(journey_clusters_local, storedmodel_directory + cluster_model_file + "_JourneyClusters") cluster_json = journey_clusters.map(Data.extract_journey_json).collect() with open(cluster_json_directory + "clusters.json", "w") as f: for cluster in cluster_json: f.write(cluster + "\n") journey_clusters.unpersist() # Build initial classification models init_class_models = journeys_with_id.mapValues(lambda data: Models.train_init_class_model(init_class_alg, init_class_feature_names, data)).collectAsMap() joblib.dump(init_class_models, storedmodel_directory + init_class_model_file) # Build online classification models online_class_models = journeys_with_id.mapValues(lambda data: Models.train_online_class_model(online_class_alg, online_class_feature_names, data)).collectAsMap() joblib.dump(online_class_models, storedmodel_directory + online_class_model_file) sc.stop()
title_str = '{:30s}{:<30}{:<20}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}\n' log_file.write( title_str.format('data_name', 'model_name', 'loss_function', 'num_train', 'batch_size', 'max_iters', 'uncmp_psnr', 'uncmp_rmse', 'uncmp_ssim', 'valid_psnr', 'valid_rmse', 'valid_ssim')) log_file.close() # resize the input images if input_size is not None input_size = None # input_size = (256, 256) # we can also use a low-res input to reduce memory usage and speed up training/testing with a sacrifice of precision resetRNGseed(0) # create a CompenNeSt # load pre-trained CompenNeSt on Blender dataset ckpt_file = '../../checkpoint/blender_pretrained_CompenNeSt_l1+ssim_50000_32_20000_0.0015_0.8_2000_0.0001_20000.pth' compen_nest = Models.CompenNeSt() if torch.cuda.device_count() >= 1: compen_nest = nn.DataParallel(compen_nest, device_ids=device_ids).to(device) compen_nest.load_state_dict(torch.load(ckpt_file)) compen_nest.device_ids = device_ids # stats for different setups for data_name in data_list: # load training and validation data data_root = fullfile(dataset_root, data_name) cam_surf, cam_train, cam_valid, prj_train, prj_valid, mask_corners = loadData( dataset_root, data_name, input_size, CompenNeSt_only=False) # surface image for training and validation cam_surf_train = cam_surf.expand_as(cam_train)
# transform skewed data skewed = ['capital-gain', 'capital-loss'] features_log_transformed = pd.DataFrame(data = features_raw) features_log_transformed[skewed] = features_raw[skewed].apply(lambda x: np.log(x + 1)) # Normalize numerical features scaler = MinMaxScaler() # default=(0, 1) numerical = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'] features_log_minmax_transform = pd.DataFrame(data = features_log_transformed) features_log_minmax_transform[numerical] = scaler.fit_transform(features_log_transformed[numerical]) # One-hot encode categorical features features_final = pd.get_dummies(features_log_minmax_transform) income = income_raw.map({'>50K': 1, '<=50K': 0}) # Shuffle and Split data X_train, X_test, y_train, y_test = train_test_split(features_final, income, test_size = 0.2, random_state = 0) # Evaluate Model Performance with fbeta = 0.5 fbeta = 0.5 best_clf = Models.evaluate_models(X_train,y_train,X_test,y_test,fbeta) print("\n",best_clf.__class__.__name__) best_clf = Models.optimize_best_model(best_clf,X_train,y_train,X_test,y_test) model_predictions = best_clf.predict(X_test) print("Final accuracy score on the testing data: {:.4f}".format(accuracy_score(y_test, model_predictions))) print("Final F-score on the testing data: {:.4f}".format(fbeta_score(y_test, model_predictions, beta = 0.5)))
def __init__(self, device): self.device = device self.model = Models.ResNet() self.model.to(self.device) self.model.load_state_dict(torch.load("C:/Users/Joab-PC/Desktop/FYP/GUI/DANN/DANN_ResNet_Darkdata_100e.pth", map_location="cuda:0"))
def initialize_from_name(self): """ Try to initialize a monster object from a string. Lots of craziness here to protect the users from themselves. Note also that we're overwriting a method of Models.py with this! """ # sanity warning if "_" in self.name: self.logger.warn( "Asset name '%s' contains underscores. Names should use whitespace." % self.name) self.logger.warn("Attempting to initialize by handle...") self.handle = self.name self.initialize_from_handle() return True # first, check for an exact name match (long-shot) asset_dict = self.assets.get_asset_from_name(self.name) if asset_dict is not None: self.initialize_asset(asset_dict) return True # next, split to a list and try to set asset and level name_list = self.name.split(" ") # accept any int in the string as the level for i in name_list: if i.isdigit(): setattr(self, "level", int(i)) # now iterate through the list and see if we can get a name from it for i in range(len((name_list))): parsed_name = " ".join(name_list[:i]) asset_dict = self.assets.get_asset_from_name(parsed_name) if asset_dict is not None: self.initialize_asset(asset_dict) if len(name_list) > i and name_list[i].upper() not in [ "LEVEL", "LVL", "L" ]: setattr(self, "comment", (" ".join(name_list[i:]))) return True # finally, create a list of misspellings and try to get an asset from that # (this is expensive, so it's a last resort) m_dict = {} for asset_handle in self.assets.get_handles(): asset_dict = self.assets.get_asset(asset_handle) if "misspellings" in asset_dict.keys(): for m in asset_dict["misspellings"]: m_dict[m] = asset_handle for i in range(len((name_list)) + 1): parsed_name = " ".join(name_list[:i]).upper() if parsed_name in m_dict.keys(): asset_handle = m_dict[parsed_name] self.initialize_asset(self.assets.get_asset(asset_handle)) if len(name_list) > i and name_list[i].upper() not in [ "LEVEL", "LVL", "L" ]: setattr(self, "comment", (" ".join(name_list[i:]))) return True # if we absolutely cannot guess wtf monster name this is, give up and # throw a utils.Asseterror() if self.handle is None: raise Models.AssetInitError( "Asset name '%s' could not be translated to an asset handle!" % self.name)
def train(): data_out_dir = '/media/tk/EE44DA8044DA4B4B/cataract_phase_img' height = 224 width = 224 skip_rate = 10 batch = 32 nb_classes = 11 nb_epoch = 100 current_batch_count = 1 out_dir_name = 'ResNet50Pretrain_phase' ## CHECK THIS!!!!!!!!! activation = "relu" ## CHECK THIS!!!!!!!!! momentum = 0.9 lr = 0.01 optimizer = SGD(lr=lr, momentum=momentum, decay=0.0, nesterov=True) ## CHECK THIS!!!!!!!!! loss = 'categorical_crossentropy' model = Models.resnet(nb_classes) model.compile(optimizer, loss=loss, metrics=['accuracy']) X = np.zeros((batch,height,width,3)) Y = np.zeros((batch,nb_classes)) for e in range(0,nb_epoch): ACC = 0. LOSS = 0. N = 0 for vid_num in sample_lengths.keys(): lmdb_env_x = lmdb.open(os.path.join(data_out_dir,vid_num+"X")) lmdb_txn_x = lmdb_env_x.begin() lmdb_cursor_x = lmdb_txn_x.cursor() lmdb_env_y = lmdb.open(os.path.join(data_out_dir,vid_num+"y")) lmdb_txn_y = lmdb_env_y.begin() lmdb_cursor_y = lmdb_txn_y.cursor() indices = list(range(0,int(sample_lengths[vid_num]/skip_rate))) np.random.shuffle(indices) label = np.frombuffer(lmdb_cursor_y.get('{:0>8d}'.format(0).encode()),dtype=np.dtype(np.int64)) for index in indices: real_frame_ind = index*skip_rate try: value = np.frombuffer(lmdb_cursor_x.get('{:0>8d}'.format(index).encode()),dtype=np.dtype(np.uint8)) except: continue #pdb.set_trace() x = value.reshape((height,width,3)) x.setflags(write=1) x = x.astype(np.float) x -= 128 x /= 128.0 y = label[real_frame_ind] X[current_batch_count] = x Y[current_batch_count,y] = 1 current_batch_count += 1 if (current_batch_count % batch) == 0: losses = model.train_on_batch(X, Y) ACC += losses[1] # current accuracy distinguishing real-vs-fake LOSS += losses[0] N += 1 print("epoch: {:03d} | loss: {:.03f} | acc: {:.03f} \r".format(e,LOSS/N,ACC/N), end='\r') ## TRAIN() X = np.zeros((batch,height,width,3)) Y = np.zeros((batch,nb_classes)) current_batch_count = 0 print("Finished with epoch:", e,"\n") model_file = './weights/'+ out_dir_name + '_ep:%03d_acc:%0.3f_loss:%0.3f.h5' % (e+1,(LOSS/N),(ACC/N)) model.save_weights(model_file, overwrite=True)
def experiment_poly_svm(train, test, featurizer): data = Features.make_experiment_matrices(train, test, featurizer) return Models.report_SVM_polyK(data['train_X'], data['train_Y'], data['test_X'], data['test_Y'])