def test_DT(self): records, attributes = load_data("data/mushrooms_train.data") test_records = load_data("data/mushrooms_train.data")[0] #print(records, attributes) RF = RandomForest(tree_num=10) RF.train(records, attributes)
def test_DT(self): records, attributes = load_data("data/mushrooms_train.data") test_records = load_data("data/mushrooms_train.data")[0] #print(records, attributes) dt = DecisionTree() best_index, best_index_dict = dt.find_best_split(records, attributes, class_index=0) dt.shuffle_dataset(best_index_dict)
def init(self): if not self.initialized: self.initialized = True print('init started') self.mat = base.np.load('mat.dat') self.maxLength = base.np.load('maxLength.dat') self.glove = base.load_data('glove.dat') self.patty = base.load_data('patty.dat') self.patty.processData() print("data loaded")
def averageAccuracy(student): ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### print("Notre Dame de Paris") image1, image2, eval_file = load_data('notre_dame') image1 = rgb2gray(rescale(image1, scale_factor)) image2 = rgb2gray(rescale(image2, scale_factor)) x1, y1, x2, y2, matches, confidences = find_matches( student, image1, image2, eval_file) num_pts_to_visualize = matches.shape[0] acc100ND = evaluate_correspondence(image1, image2, eval_file, scale_factor, x1, y1, x2, y2, matches, confidences, num_pts_to_visualize, "Notre_Dame.jpg") ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### print("Mount Rushmore") image1, image2, eval_file = load_data('mt_rushmore') image1 = rgb2gray(rescale(image1, scale_factor)) image2 = rgb2gray(rescale(image2, scale_factor)) x1, y1, x2, y2, matches, confidences = find_matches( student, image1, image2, eval_file) num_pts_to_visualize = matches.shape[0] acc100MR = evaluate_correspondence(image1, image2, eval_file, scale_factor, x1, y1, x2, y2, matches, confidences, num_pts_to_visualize, "Mt_Rushmore.jpg") ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### print("Episcopal Guadi") image1, image2, eval_file = load_data('e_gaudi') image1 = rgb2gray(rescale(image1, scale_factor)) image2 = rgb2gray(rescale(image2, scale_factor)) x1, y1, x2, y2, matches, confidences = find_matches( student, image1, image2, eval_file) num_pts_to_visualize = matches.shape[0] acc100EG = evaluate_correspondence(image1, image2, eval_file, scale_factor, x1, y1, x2, y2, matches, confidences, num_pts_to_visualize, "e_gaudi.jpg") ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### acc100Avg = (acc100ND + acc100MR + acc100EG) / 3.0 print("Average Accuracy: " + str(acc100Avg)) return acc100ND, acc100MR, acc100EG, acc100Avg
def login_student(): signin_url = 'http://10.168.6.10:801/eportal/?c=ACSetting&a=Login&protocol=http:&hostname=10.168.6.10&iTermType=1&wlanuserip=' + ip + '&wlanacip=10.168.6.9&mac=00-00-00-00-00-00&ip=' + ip + '&enAdvert=0&queryACIP=0&loginMethod=1' headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip,deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Connection': 'keep-alive', 'Content-Length': '152', 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': 'program=new;vlan=0;ip=10.67.24.132;ssid=null;areaID=null;md5_login2=%2C0%2C1998004%7C123123; PHPSESSID=f2fp2ubgiem23r9kttrpe0cjl0', 'Host': '10.168.6.10:801', 'Origin': 'http://10.168.6.10', 'Referer': 'http://10.168.6.10/a70.htm?wlanuserip=10.67.24.132&wlanacip=10.168.6.9&wlanacname=&vlanid=0&ip=10.67.24.132&ssid=null&areaID=null&mac=00-00-00-00-00-00', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0' } sub_json = load_data('student_config.json') resp = requests.post( 'http://10.168.6.10:801/eportal/?c=ACSetting&a=Login&protocol=http:&hostname=10.168.6.10&iTermType=1&wlanuserip=' + ip + '&wlanacip=10.168.6.9&mac=00-00-00-00-00-00&ip=' + ip + '&enAdvert=0&queryACIP=0&loginMethod=1', sub_json, headers) #print(resp.status_code) return resp.status_code
def get_confusion_matrix(): """Get and plot confusion matrix (tp, fp, tn, fn) of a ML model.""" # load weights print("Loading model...") model = keras.models.load_model(os.path.join(config['checkpoint_dir'], "weights.best.hdf5")) print("Model successfully loaded!") # load data print("Loading data...") data = main.load_data(os.getcwd(), "panda_v3.db") print("Data successfully loaded!") # get predictions print("Predicting classes...") start_t = time.time() y_pred = model.predict_classes(data['test_X']) end_t = time.time() print("Classes successfully predicted!") print("Time elapsed: %f s \n" % (end_t - start_t)) # get and print confusion matrix tn, fp, fn, tp = sklearn.metrics.confusion_matrix(data['test_y'], y_pred).ravel() print(" | task-set schedulable | task-set not schedulable") print("-------------------------------------------------------------") print("SA positive | tp = %d | fp = %d" % (tp, fp)) print("-------------------------------------------------------------") print("SA negative | fn = %d | tn = %d" % (fn, tn))
def run(): import main main.load_data('twitter.gold', 'data/vocab_yelp.txt', 'data/vectors_yelp.txt') (train_size, _) = main.X_train.shape (vocab_size, dimensions) = main.embeddings_matrix.shape for pooling in ('max', 'average', 'logsumexp'): net = Net(pooling=pooling, embeddings_matrix=main.embeddings_matrix, vocab_size=vocab_size, dimensions=dimensions) batch_size = 64 for i in range(train_size // batch_size): x = main.X_train[batch_size * i:batch_size * (i + 1)] y = net(Variable(torch.LongTensor(x))) logger.info('output: {}'.format(y)) break
def main(): while True: city, month, day = mn.get_filters() df = mn.load_data(city, month, day) mn.time_stats(df) mn.station_stats(df) mn.trip_duration_stats(df) mn.user_stats(df) inp.get_raw_data(df) restart = input('\nWould you like to restart? Enter yes or no. __ ') if restart.lower() != 'yes': break
def test_final_model(model): ''' Test final model on held out testing data ''' test_data = load_data('data/churn_test.csv') test_data = data_processing(test_data) X, y, cols = design_matrix(test_data, True) test_pred = model.predict(X) print '=' * 50 print 'confusion_matrix\n', confusion_matrix(y, test_pred) print 'test data f1 score: ', f1_score(y, test_pred) print 'test data precision score: ', precision_score(y, test_pred) print 'test data recall score: ', recall_score(y, test_pred)
def main(text_path, classifier): x_train, _, x_test, _, _, _ = load_data(text_path) x_test = x_test[-20:] print(x_test) model = keras.models.load_model(os.path.join(text_path, classifier)) print(model.summary()) vectorizer = TextVectorization(max_tokens=config['max_vocab_size'], output_sequence_length=config['max_seq_len']) train_data = tf.data.Dataset.from_tensor_slices(x_train).batch(config['batch_size']) vectorizer.adapt(train_data) x_test = vectorizer(np.array([[w] for w in x_test])).numpy() prediction = model.predict(x_test) print(prediction) classes = np.argmax(prediction, axis=-1) print(classes)
def main(): data = load_data() fgraph = nx.Graph(data.functions) hgraph = nx.Graph(data.humanppi) T = Table(name='proteindata') for p, ans in data.test1: T.add('Protein', p) T.add('Degree', p.degree(hgraph)) T.add('CP Degree', p.cp_degree(hgraph)) T.add('Fn CP Degree', p.fn_cp_degree(hgraph, fgraph)) T.add('Fn CP Weight', p.fn_cp_weight(hgraph, fgraph)) T.add('Cancer Weight', p.cancerweight(hgraph, fgraph)) T.add('CP Degree Nbrs', p.cp_degree_of_neighbors(hgraph)) T.add('Answer', ans) T.order = [ 'Protein', 'Degree', 'CP Degree', 'Fn CP Degree', 'Fn CP Weight', 'Cancer Weight', 'CP Degree Nbrs', 'Answer' ] write_to_file(T, 'report/test1data.csv')
def main(): data = load_data() fgraph = nx.Graph(data.functions) hgraph = nx.Graph(data.humanppi) T = Table(name='proteindata') for p, ans in data.test1: T.add('Protein', p) T.add('Degree', p.degree(hgraph) ) T.add('CP Degree', p.cp_degree(hgraph) ) T.add('Fn CP Degree', p.fn_cp_degree(hgraph, fgraph) ) T.add('Fn CP Weight', p.fn_cp_weight(hgraph, fgraph)) T.add('Cancer Weight', p.cancerweight(hgraph, fgraph)) T.add('CP Degree Nbrs', p.cp_degree_of_neighbors(hgraph)) T.add('Answer', ans) T.order = ['Protein', 'Degree', 'CP Degree', 'Fn CP Degree', 'Fn CP Weight', 'Cancer Weight', 'CP Degree Nbrs', 'Answer'] write_table_to_file(T, 'test1data.csv')
elif (i.find(']') == 1): temp[temp.index(i)] = i.replace(']', '') temp_list = list() for i in temp: temp_list.append(int(i)) t.value = temp_list return t def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) def t_error(t): print('Illegal characters!') t.lexer.skip(1) lexer = lex.lex() lexer.input(load_data("test.davis")) while True: tok = lexer.token() # print(tok) if not tok: break
def plot_dists(df): for col in ['avg_surge','surge_pct','avg_dist','avg_rating_by_driver','avg_rating_of_driver',\ 'trips_in_first_30_days','weekday_pct']: plot_distribution_by_churn(df, col) def plot_category(df, col): print pd.crosstab(df['churn'], df[col]) def plot_cats(df): print '-' * 50 plot_category(df, 'city') print '-' * 50 plot_category(df, 'phone') print '-' * 50 plot_category(df, 'luxury_car_user') print '-' * 50 if __name__ == '__main__': df = load_data('data/churn_train.csv') df = data_processing(df) # plot_category(df,'avg_rating_by_driver_isnull') # plot_category(df,'avg_rating_of_driver_isnull') # plot_dists(df) X_train, X_test, y_train, y_test, cols = design_matrix(df) model = lr_search(X_train, X_test, y_train, y_test, cols) test_final_model(model)
def main(): data = load_data() humanppi_G = nx.Graph(data.humanppi, name="HumanPPI") write_to_hdf5(humanppi_G) return 0
import numpy as np import pywt import torch from sklearn.metrics import classification_report, confusion_matrix from skorch import NeuralNetClassifier from main import MyModule, load_data if __name__ == "__main__": sampling_rate = 360 wavelet = "mexh" # mexh, morl, gaus8, gaus4 scales = pywt.central_frequency(wavelet) * sampling_rate / np.arange( 1, 101, 1) (x1_train, x2_train, y_train, groups_train), (x1_test, x2_test, y_test, groups_test) = load_data(wavelet=wavelet, scales=scales, sampling_rate=sampling_rate) net = NeuralNetClassifier( MyModule, device="cuda" if torch.cuda.is_available() else "cpu") net.initialize() net.load_params(f_params="./models/model_{}.pkl".format(wavelet)) y_true, y_pred = y_test, net.predict({"x1": x1_test, "x2": x2_test}) print(confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred, digits=4))
from std_msgs.msg import Int8, String from core import Speaker import main def callback(data): global _start_order _start_order = bool(data.data) rospy.init_node("home_edu_order program", anonymous=True) s = Speech2Text() rospy.Subscriber("/home_edu/order", Int8, callback, queue_size=1) _main_publisher = rospy.Publisher('/home_edu/order_msg', String, queue_size=1) _start_order = False _order_msg = "" kdata = main.load_data("./mission3.txt") while not rospy.is_shutdown(): if _start_order: msg = s.listen() _order_msg = main.answer_question_from_data(msg, kdata)['question'] _main_publisher.publish(_order_msg) else: continue
def test_load_non_existent_fixture(self): with self.assertRaises(FileNotFoundError): main.load_data('nonsense')
import argparse import main parser = argparse.ArgumentParser() parser.add_argument("data_directory", help="add a data directory", default="flowers") parser.add_argument("--arch", default="vgg19", type=str) parser.add_argument("--learning_rate", default=0.001) parser.add_argument("--hidden_units", default=2048) parser.add_argument("--epochs", default=8, type=int) parser.add_argument("--save_dir", default="checkpoint.pth") args = parser.parse_args() data_dir = args.data_directory arch = args.arch learning_rate = args.learning_rate hidden_units = args.hidden_units epochs = args.epochs save_dir = args.save_dir trainloader, validloader, testloader, train_data = main.load_data(data_dir) model, criterion, optimizer = main.model_setup(arch, learning_rate, hidden_units) main.train_model(model, criterion, optimizer, epochs, trainloader, validloader) main.saving_checkpoint(model, save_dir, train_data, hidden_units, optimizer) print("The model is trained")
scores.append(100.0 * accuracy / ((folds - 1) * len(predicted_Class))) i += 1 return np.array(scores) if __name__ == "__main__": print 50 * '*' print "RANDOM FORESTS" print 50 * '*' DEPTH = 25 NUM_TREES = 50 depths = [5, 25, 50] ############# FILE STUFF ############# File_Spam = "./Data/spam_data.mat" trainingData, trainingLabels, testData = load_data(File_Spam) print "Num Features", np.shape(trainingData)[1] ############# DATA PARTIONING ############# crossValidation_Data = [] crossValidation_Labels = [] k = 10 stepLength = k for index in range(0, k): crossValidation_Data.append(trainingData[index:-1:stepLength]) crossValidation_Labels.append(trainingLabels[index:-1:stepLength]) scoreBuffer = [] ############# CROSS-VALIDATION ############# print 50 * '=' print "CROSS VALIDATION USING RANDOM FORESTS"
# coding: utf-8 get_ipython().magic('pylab') get_ipython().magic('load_ext autoreload') get_ipython().magic('autoreload 2') import main data = main.load_data() vects = main.load_google_news_vectors() w2v_v_scores, w2v_clusterings = main.get_vmeasure_curve_and_clusterings( data, main.get_w2v_tranform(vects, tfidf=False)) tfidf_v_scores, w2v_clusterings = main.get_vmeasure_curve_and_clusterings( data, main.tfidf_transform) main.v_measure_figure() plot(w2v_v_scores) plot(tfidf_v_scores)
return True else: return False except Exception: print("error") def listen_callback(data): global msg msg = data.data print(msg) if __name__ == '__main__': kdata = main.load_data( "/home/mustar/pcms/src/home_edu/scripts/second_keyword.txt") msg = ' ' rospy.init_node("home_edu_PCMS_Second_mission", anonymous=True) rate = rospy.Rate(20) s = speaker(150, 1.5) rospy.Subscriber("/home_edu_Listen/msg", String, listen_callback, queue_size=1) t = speech2text() t.ambient_noise() chassis = chassis() c = astra("top_camera") f = PH_Follow_me()
'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False], 'n_estimators': [10, 50, 100, 1000], 'random_state': [1]} rf_gridsearch = GridSearchCV(RandomForestClassifier(), random_forest_grid, n_jobs=-1, verbose=True, scoring='f1') rf_gridsearch.fit(X_train, y_train) rf_gridsearch.best_params if __name__ == '__main__': df = load_data('data/churn_train.csv') df = data_processing(df) df = drop_date(df) X, y = get_X_and_y(df) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42) # DT dt = decision_tree(X_train, y_train) dt_pred = dt.predict(X_test) print score(dt, y_test, dt_pred) # GD gd = gd_model(X_train, y_train) gd_pred = gd.predict(X_test) print score(gd, y_test, gd_pred) # RF rf = rf_model(X_train, y_train) rf_pred = rf.predict(X_test)
plot_distribution_by_churn(df,'avg_dist') plot_distribution_by_churn(df,'avg_rating_by_driver') plot_distribution_by_churn(df,'avg_rating_of_driver') plot_distribution_by_churn(df,'trips_in_first_30_days') plot_distribution_by_churn(df,'weekday_pct') def plot_category(df,col): print pd.crosstab(df['churn'],df[col]) def plot_cats(df): print '-'*50 plot_category(df,'city') print '-'*50 plot_category(df,'phone') print '-'*50 plot_category(df,'luxury_car_user') print '-'*50 if __name__ == '__main__': df = load_data() # plot_cats(df) df = data_processing(df) # plot_category(df,'avg_rating_by_driver_isnull') # plot_category(df,'avg_rating_of_driver_isnull') # plot_dists(df) X_train,X_test,y_train,y_test,cols = design_matrix(df) model = lr_search(X_train,X_test,y_train,y_test,cols) test_final_model(model)
input_qst.data.resize_(qst.size()).copy_(qst) label.data.resize_(ans.size()).copy_(ans) def test(data): # test model model.eval() accuracy = [] for batch_idx in range(len(data[0]) // bs): tensor_data(data, batch_idx) acc_bin, l = model.test_(input_img, input_qst, label) accuracy.append(acc_bin.item()) acc = sum(accuracy) / len(accuracy) return acc rel_train, rel_test, rel_val, norel_train, norel_test, norel_val = load_data() # TEST on non-relational questions Q1, Q2, Q3, _, _, _, _, _ = split_data(norel_test) acc = test(Q1) print('\n Test set: Unary accuracy (shape of object): {:.0f}%\n'.format(acc)) acc = test(Q2) print( '\n Test set: Unary accuracy (query vertical position): {:.0f}%\n'.format( acc)) acc = test(Q3) print( '\n Test set: Unary accuracy (query horizontal position->yes/no): {:.0f}%\n' .format(acc)) # TEST on elational questions
is_file_content=True, no_rel_name=generator.get_no_rel_name())) print("score (model 2): %.4f %.4f" % get_f1(golden, all_generated_2, is_file_content=True, no_rel_name=generator.get_no_rel_name())) if __name__ == "__main__": C, logger = get_config() #fitlog.debug() C.info += "-watch" #----- prepare data and some global variables ----- data_train, data_test, data_valid, relations, rel_weights = load_data( C, logger) _, loss_func, generator = initialize(C, logger, relations, rel_weights) if C.watch_type == "train": data_watch = data_train if C.watch_type == "test": data_watch = data_test if C.watch_type == "valid": data_watch = data_valid #----- load model ----- if not C.model_save or not C.model_save_2: raise "model information incomplete" with open(C.model_save, "rb") as fil:
def test_load_fixture(self): data = main.load_data('appdynamics') self.assertIsNotNone(data)
import paddle from paddle.nn import Linear import paddle.nn.functional as F import numpy as np from main import load_data, Regressor, max_values, min_values, avg_values training_data, test_data = load_data() def load_one_example(): # 从上边已加载的测试集中,随机选择一条作为测试数据 idx = np.random.randint(0, test_data.shape[0]) idx = -10 one_data, label = test_data[idx, :-1], test_data[idx, -1] # 修改该条数据shape为[1,13] one_data = one_data.reshape([1, -1]) return one_data, label model = Regressor() # 参数为保存模型参数的文件地址 model_dict = paddle.load('LR_model.pdparams') model.load_dict(model_dict) model.eval() # 参数为数据集的文件地址 one_data, label = load_one_example() # 将数据转为动态图的variable格式 one_data = paddle.to_tensor(one_data) predict = model(one_data)
def classify(model_dir, n_inference_steps=20, n_inference_samples=20, dim_hs=[100], h_act='T.nnet.softplus', learning_rate=0.0001, learning_rate_schedule=None, dropout=0.1, batch_size=100, l2_decay=0.002, epochs=100, optimizer='rmsprop', optimizer_args=dict(), center_input=True, name='classifier'): out_path = model_dir inference_args = dict( inference_method='adaptive', inference_rate=0.1, ) # ======================================================================== print 'Loading model' model_file = glob(path.join(model_dir, '*best*npz'))[0] models, model_args = load_model(model_file, unpack_sbn, **inference_args) model = models['sbn'] model.set_tparams() dataset = model_args['dataset'] dataset_args = model_args['dataset_args'] if dataset == 'mnist': dataset_args['binarize'] = True dataset_args['source'] = '/export/mialab/users/dhjelm/data/mnist.pkl.gz' train, valid, test = load_data(dataset, batch_size, batch_size, batch_size, **dataset_args) mlp_args = dict( dim_hs=dim_hs, h_act=h_act, dropout=dropout, out_act=train.acts['label'] ) X = T.matrix('x', dtype=floatX) Y = T.matrix('y', dtype=floatX) trng = RandomStreams(random.randint(0, 1000000)) if center_input: print 'Centering input with train dataset mean image' X_mean = theano.shared(train.mean_image.astype(floatX), name='X_mean') X_i = X - X_mean else: X_i = X # ======================================================================== print 'Loading MLP and forming graph' (qs, i_costs), _, updates = model.infer_q( X_i, X, n_inference_steps, n_inference_samples=n_inference_samples) q0 = qs[0] qk = qs[-1] constants = [q0, qk] dim_in = model.dim_h dim_out = train.dims['label'] mlp0_args = deepcopy(mlp_args) mlp0 = MLP(dim_in, dim_out, name='classifier_0', **mlp0_args) mlpk_args = deepcopy(mlp_args) mlpk = MLP(dim_in, dim_out, name='classifier_k', **mlpk_args) mlpx_args = deepcopy(mlp_args) mlpx = MLP(train.dims[str(dataset)], dim_out, name='classifier_x', **mlpx_args) tparams = mlp0.set_tparams() tparams.update(**mlpk.set_tparams()) tparams.update(**mlpx.set_tparams()) print_profile(tparams) p0 = mlp0(q0) pk = mlpk(qk) px = mlpx(X_i) # ======================================================================== print 'Getting cost' cost0 = mlp0.neg_log_prob(Y, p0).sum(axis=0) costk = mlpk.neg_log_prob(Y, pk).sum(axis=0) costx = mlpx.neg_log_prob(Y, px).sum(axis=0) cost = cost0 + costk + costx extra_outs = [] extra_outs_names = ['cost'] if l2_decay > 0.: print 'Adding %.5f L2 weight decay' % l2_decay mlp0_l2_cost = mlp0.get_L2_weight_cost(l2_decay) mlpk_l2_cost = mlpk.get_L2_weight_cost(l2_decay) mlpx_l2_cost = mlpx.get_L2_weight_cost(l2_decay) cost += mlp0_l2_cost + mlpk_l2_cost + mlpx_l2_cost extra_outs += [mlp0_l2_cost, mlpk_l2_cost, mlpx_l2_cost] extra_outs_names += ['MLP0 L2 cost', 'MLPk L2 cost', 'MLPx L2 cost'] # ======================================================================== print 'Extra functions' error0 = (Y * (1 - p0)).sum(1).mean() errork = (Y * (1 - pk)).sum(1).mean() errorx = (Y * (1 - px)).sum(1).mean() f_test_keys = ['Error 0', 'Error k', 'Error x', 'Cost 0', 'Cost k', 'Cost x'] f_test = theano.function([X, Y], [error0, errork, errorx, cost0, costk, costx]) # ======================================================================== print 'Setting final tparams and save function' all_params = OrderedDict((k, v) for k, v in tparams.iteritems()) tparams = OrderedDict((k, v) for k, v in tparams.iteritems() if (v not in updates.keys() or v not in excludes)) print 'Learned model params: %s' % tparams.keys() print 'Saved params: %s' % all_params.keys() def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update( dim_in=dim_in, dim_out=dim_out, dataset=dataset, dataset_args=dataset_args, **mlp_args ) np.savez(outfile, **d) # ======================================================================== print 'Getting gradients.' grads = T.grad(cost, wrt=itemlist(tparams), consider_constant=constants) # ======================================================================== print 'Building optimizer' lr = T.scalar(name='lr') f_grad_shared, f_grad_updates = eval('op.' + optimizer)( lr, tparams, grads, [X, Y], cost, extra_ups=updates, extra_outs=extra_outs, **optimizer_args) monitor = SimpleMonitor() try: epoch_t0 = time.time() s = 0 e = 0 widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name), Timer(), '): ', Bar()] epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start() training_time = 0 while True: try: x, y = train.next() if train.pos == -1: epoch_pbar.update(train.n) else: epoch_pbar.update(train.pos) except StopIteration: print epoch_t1 = time.time() training_time += (epoch_t1 - epoch_t0) valid.reset() widgets = ['Validating: ', Percentage(), ' (', Timer(), ')'] pbar = ProgressBar(widgets=widgets, maxval=valid.n).start() results_train = OrderedDict() results_valid = OrderedDict() while True: try: x_valid, y_valid = valid.next() x_train, y_train = train.next() r_train = f_test(x_train, y_train) r_valid = f_test(x_valid, y_valid) results_i_train = dict((k, v) for k, v in zip(f_test_keys, r_train)) results_i_valid = dict((k, v) for k, v in zip(f_test_keys, r_valid)) update_dict_of_lists(results_train, **results_i_train) update_dict_of_lists(results_valid, **results_i_valid) if valid.pos == -1: pbar.update(valid.n) else: pbar.update(valid.pos) except StopIteration: print break def summarize(d): for k, v in d.iteritems(): d[k] = np.mean(v) summarize(results_train) summarize(results_valid) monitor.update(**results_train) monitor.update(dt_epoch=(epoch_t1-epoch_t0), training_time=training_time) monitor.update_valid(**results_valid) monitor.display() monitor.save(path.join( out_path, '{name}_monitor.png').format(name=name)) monitor.save_stats(path.join( out_path, '{name}_monitor.npz').format(name=name)) monitor.save_stats_valid(path.join( out_path, '{name}_monitor_valid.npz').format(name=name)) e += 1 epoch_t0 = time.time() valid.reset() train.reset() if learning_rate_schedule is not None: if e in learning_rate_schedule.keys(): lr = learning_rate_schedule[e] print 'Changing learning rate to %.5f' % lr learning_rate = lr widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name), Timer(), '): ', Bar()] epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start() continue if e > epochs: break rval = f_grad_shared(x, y) if check_bad_nums(rval, extra_outs_names): print rval print np.any(np.isnan(mlpk.W0.get_value())) print np.any(np.isnan(mlpk.b0.get_value())) print np.any(np.isnan(mlpk.W1.get_value())) print np.any(np.isnan(mlpk.b1.get_value())) raise ValueError('Bad number!') f_grad_updates(learning_rate) s += 1 except KeyboardInterrupt: print 'Training interrupted' test.reset() widgets = ['Testing: ', Percentage(), ' (', Timer(), ')'] pbar = ProgressBar(widgets=widgets, maxval=test.n).start() results_test = OrderedDict() while True: try: x_test, y_test = test.next() r_test = f_test(x_test, y_test) results_i_test = dict((k, v) for k, v in zip(f_test_keys, r_test)) update_dict_of_lists(results_test, **results_i_test) if test.pos == -1: pbar.update(test.n) else: pbar.update(test.pos) except StopIteration: print break def summarize(d): for k, v in d.iteritems(): d[k] = np.mean(v) summarize(results_test) print 'Test results:' monitor.simple_display(results_test) if out_path is not None: outfile = path.join(out_path, '{name}_{t}.npz'.format(name=name, t=int(time.time()))) last_outfile = path.join(out_path, '{name}_last.npz'.format(name=name)) print 'Saving' save(tparams, outfile) save(tparams, last_outfile) print 'Done saving.' print 'Bye bye!'
kde=True, bins=75, ax=axs[0], color='steelblue') ax1.set_xlabel('Normal Sale Price') ax1.set_ylabel('Frequency', size=12) ax2 = sns.distplot(df['SalePrice_Log'], kde=True, bins=75, ax=axs[1], color='steelblue') ax2.set_xlabel('Log Transformed Sale Price') ax2.set_ylabel('Frequency', size=12) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.show() if __name__ == '__main__': #Load Data train, test = load_data() # Plot Correlation Matrix of all features train = clean_data(train, dummy=False) # plot_correlation_matrix(train) scatter_matrix(train) # Log-Transform SalePrice train['SalePrice_Log'] = np.log1p(train["SalePrice"]) saleprice_dist(train)
def main(): data = load_data() hgraph = nx.Graph(data.humanppi) fgraph = nx.Graph(data.functions)
import main import math from torch import nn, optim from torch.utils.data import DataLoader import numpy as np import copy import measures device = torch.device("cuda") nchannels, nclasses = 3, 10 kwargs = {'num_workers': 1, 'pin_memory': True} hidden_units = np.array([pow(2, n) for n in range(11, 16)]) err = [[] for i in range(0, 4)] weight_decay = [0, 0.001, 0.0025, 0.005] train_dataset = main.load_data('train', 'CIFAR10', '/hdd/datasets') val_dataset = main.load_data('val', 'CIFAR10', '/hdd/datasets') train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, **kwargs) val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, **kwargs) for i, decay in enumerate(weight_decay): for nunits in hidden_units: n = int(math.log(nunits, 2)) model = nn.Sequential(nn.Linear(32 * 32 * nchannels, nunits), nn.ReLU(), nn.Linear(nunits, nclasses)) model = model.to(device) init_model = copy.deepcopy(model) optimizer = optim.SGD(model.parameters(), 0.001,