def test_links_handler(self): load() response = self.testapp.get('/links') self.assertEquals(response.status_int, 200) self.assertEquals(response.content_type, "application/json") data = json.loads(response.normal_body) self.assertEquals(len(data), 8) # create link before = len(Link.query().fetch()) response = self.testapp.post_json('/links', {'title': 'Foo', 'url': 'http://www.foo.bar'}) self.assertEquals(response.status_int, 201) self.assertEquals(len(Link.query().fetch()), before + 1) # Update link response = self.testapp.post_json('/links', {'title': 'Foo Bar', 'url': 'http://www.foo.bar'}) self.assertEquals(response.status_int, 200) # Quantity of links didn't change self.assertEquals(len(Link.query().fetch()), before + 1) link = Link.get_by_id('http://www.foo.bar') self.assertEquals(link.title, 'Foo Bar') # Delete link before = len(Link.query().fetch()) response = self.testapp.post_json('/links', {'action': 'delete', 'title': 'Foo Bar', 'url': 'http://www.foo.bar'}) self.assertEquals(response.status_int, 200) self.assertEquals(before - 1, len(Link.query().fetch()))
def test_skill_model(self): load() skill = Skill(title='Python', desc='Cool', links=['http://www.github.com/']) skill.approve() self.assertEquals(skill.approved, 1) skills = Skill.all() self.assertEquals(len(skills), 4) s_key = skill.put() skill.id = s_key.id() skill.put() Skill.get(skill.id) self.assertEquals(skill.id, s_key.id())
def __init__(self): np.random.seed(0) self.n_folds = 5 self.train_X, self.train_Y, self.test_X = load_data.load() self.skf = list(StratifiedKFold(self.train_Y, self.n_folds)) self.clfs =[VotingClassifier(estimators=[('lr', LogisticRegression(random_state=1)), ('rf', RandomForestClassifier(random_state=1)), ('gnb', GaussianNB())], voting='soft'), AdaBoostClassifier(n_estimators=100, learning_rate=0.5), KNeighborsClassifier(3), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis(), DecisionTreeClassifier(random_state=0), RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), GradientBoostingClassifier(learning_rate=0.05, subsample=0.3, max_depth=8, n_estimators=100), xgb.XGBClassifier( colsample_bytree=0.3, learning_rate=0.1, n_estimators=100,objective='binary:logistic', reg_alpha=0, reg_lambda=1,)] self.stacking_train = np.zeros((self.train_X.shape[0], len(self.clfs))) self.stacking_test = np.zeros((self.test_X.shape[0], len(self.clfs))) self.clf = LogisticRegression(class_weight = 'balanced') self.fit() self.predict()
def line_fit(cx,cy,x_range,curve,style,reference,reference_plot,m_plot,axarr): x_guide = np.linspace(x_range[0],x_range[1],100) table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d") bins,table = load_data.assign(table=table,Nb=20,th=0.5,equal_samples=True, redistribute=False,rd_th=0,ct_th=0,print_sizes=False) if reference == "all galaxies": reference_table = full_table else: reference_table = table if curve == True: def f(x,k,c1,c2): return k**(-x + c1) + c2 else: def f(x,k,c1,c2): return k*x + c1 if reference_plot == True: p_r,c_r = curve_fit(f,reference_table[:,-1],reference_table[:,-2],maxfev=10000) for m in range(5): if reference_plot == True: axarr[m].plot(x_guide,f(x_guide,p_r[0],p_r[1],p_r[2]),color="k",linewidth=2,linestyle=style) if m_plot == True: t_m = table[bins[:,1] == m] p,c = curve_fit(f,t_m[:,-1],t_m[:,-2],maxfev=10000) axarr[m].plot(x_guide,f(x_guide,p[0],p[1],p[2]),color=colours[m],linewidth=2,linestyle=style) return None
def main(): # load data print "Loading..." objects, training, validation = load() # calculate similarities print "Calculating similarities..." similarities = generate_similar_users(training, min_support=5) # see how well predict_rating performs print "Validating..." error = 0 error2 = 0 # error from only cases where we could give a rating num_validations = 0 for user in validation: for item in validation[user]: p = predict_rating(user, item, training, similarities) a = validation[user][item] error += abs(p[0] - a) num_validations += 1 if p[1]: error2 += abs(p[0] - a) #print "Predicted: %0.2f,\t Actual: %0.2f" % (p[0], a) print "Error, on average, was %0.2f" % (error/num_validations) print "Error, excluding not enough data cases, was, on average, %0.2f" % (error2/num_validations)
def test_skills_handler(self): load() response = self.testapp.get('/skills') self.assertEquals(response.status_int, 200) self.assertEquals(response.content_type, 'application/json') data = json.loads(response.normal_body) self.assertEquals(4, len(data)) # Approve skill skill = Skill(title="Math").put() response = self.testapp.post_json('/skills/approve/', {'_id': skill.id()}) self.assertEquals(response.status_int, 201) self.assertEquals(Skill.get(skill.id()).approved, 1) # Create skill before = len(Skill.all()) post_data = {'action': 'new', 'data': {'title': 'Python', 'desc': 'Love it!', 'links': [{'url': 'http://www.github.com', 'title': 'My Github'}]}} response = self.testapp.post_json('/skills', post_data) self.assertEquals(response.status_int, 201) self.assertEquals(response.content_type, 'application/json') self.assertEquals(before + 1, len(Skill.all())) # Remove skill before = len(Skill.all()) response = self.testapp.post_json('/skills', {'_id': skill.id(), 'action': 'delete'}) self.assertEquals(response.status_int, 200) self.assertEquals(before - 1, len(Skill.all())) # Updte skill links_before = len(Link.query().fetch()) skill = Skill(title='Foo', desc='Bar').put() post_data = {'action': 'update', 'data': {'_id': skill.id(), 'title': 'Noob', 'desc': 'Noob!', 'links': [{'url': 'http://www.noob.com', 'title': 'Noob Com'}]}} response = self.testapp.post_json('/skills', post_data) self.assertEquals(200, response.status_int) self.assertEquals(links_before + 1, len(Link.query().fetch()), msg="Should create new link.") skill = Skill.get(skill.id()) self.assertEquals(len(skill.links), 1) self.assertEquals(skill.title, 'Noob')
def main(): X, y = load() print 'Running Model' mdl = xgb.XGBClassifier() scores = cross_val_score(mdl, X, y, cv=5, scoring='accuracy', n_jobs=-1) print np.mean(scores)
def histogram(cx,Nb,bin_extent,axarr,full_hist,style): ''' Plot histograms----------------------------------------------------------- -------------------------------------------------------------------------- Arguments: cx: The data you want histogrammed (ie. a colour, mass etc.) Nb: Number of bins. bin_extent: List of form [lower bound,upper bound] axarr: the plotting array as from make_grid or make_stack. full_hist: set as "all" for all galaxies, "all spirals" for all spiral galaxies, or "assigned spirals" for all spirals that meet the threshold to be classified as having a particular arm number. style: histogram line style eg. "solid" -------------------------------------------------------------------------- ''' # Load all of the data and assign arms to each: table,full_table = load_data.load(cx=cx,cy=["REDSHIFT_1"] ,p_th=0.5,N_th=5,norm=False,p_values="d") bins,table = load_data.assign(table=table,Nb=20,th=0.5, equal_samples=True,redistribute=False,rd_th=0,ct_th=0 ,print_sizes=False) # Define histogram bins: bin_values=np.linspace(bin_extent[0],bin_extent[1],Nb+1) for m in range(5): t_select=table[bins[:,1] == m] # Reference histograms: if full_hist == "all": axarr[m].hist(table[:,-1],bins=bin_values,normed=True ,histtype="step",linewidth=2, color="k",alpha=0.75) elif full_hist == "all spirals": axarr[m].hist(table[:,-1],bins=bin_values,normed=True ,histtype="step",linewidth=2, color="k",alpha=0.75) else: sel = table[(bins[:,1] != -999) & (bins[:,1] != 5)] axarr[m].hist(sel[:,-1],bins=bin_values,normed=True ,histtype="step",linewidth=2, color="k",alpha=0.75) if full_hist != "assigned spirals": print("Invalid full_hist value; using 'assigned spirals'") # Plot histograms. axarr[m].hist(t_select[:,-1],bins=bin_values,normed=True ,histtype="step",linewidth=2,color=colours[m],linestyle=style) return None
def xgb_model_cv(): X, y = load() print 'Running Model' mdl = xgb.XGBClassifier() scores = cross_val_score(mdl, X, y, cv=5, scoring='f1', n_jobs=-1) print 'f1 =', np.mean(scores) scores = cross_val_score(mdl, X, y, cv=5, scoring='accuracy', n_jobs=-1) print 'accuracy =', np.mean(scores) scores = cross_val_score(mdl, X, y, cv=5, scoring='recall', n_jobs=-1) print 'recall =', np.mean(scores)
def load(dataset_path, force_refresh = False, **kwargs): if force_refresh or not os.path.isfile(dataset_path): print "[trainer] loading data from scratch" df = load_data.load(**kwargs) print "[trainer] assigning airport" df = load_data.assign_airport(df, **kwargs) print "[trainer] saving dataset to %s" % dataset_path df.to_csv(dataset_path, encoding='utf-8') else: print "[trainer] loading existing data at %s" % dataset_path # Using python engine, which is slower, but was having memory issues when using # the C engine df = pd.read_csv(dataset_path, encoding='utf-8', engine="python") return df
def xgb_model(): X, y = load() params = {'learning_rate': [0.01, 0.03, 0.1, 0.3], 'n_estimators': [50, 125, 300], 'subsample': [0.5, 1.0], 'max_depth': [1, 3, 10]} mdl = xgb.XGBClassifier() gs = GridSearchCV(mdl, params, cv=5, n_jobs=-1) gs.fit(X, y) print 'Best params:', gs.best_params_ print 'Best score:', gs.best_score_ mdl = gs.best_estimator_ with open('data/xgb_model.pkl', 'w') as f: pickle.dump(mdl, f)
def plot_data(cx,axarr,Nb,equal_samples,style,errors,data_type): ''' Plot data----------------------------------------------------------------- -------------------------------------------------------------------------- Arguments: cx: The data you want plotted (ie. a colour, mass etc.) axarr: the plotting array as from make_grid or make_stack. Nb: Number of bins. equal samples: if True, bin into equally sized samples. If False, bin into equally spaced bins. style: linestyle eg. "dotted" or "solid" errors: if True, plot errors from Cameron et al. data_type: have either "d" for debiased, "r" for raw, or "w" for debiased from Willet et al. -------------------------------------------------------------------------- ''' table,full_table = load_data.load(cx=cx,cy=["REDSHIFT_1"],p_th=0.5, N_th=10,norm=False,p_values=data_type) bins,table = load_data.assign(table=table,Nb=Nb,th=0.5, equal_samples=equal_samples,redistribute=False,rd_th=0,ct_th=0, print_sizes=False) for m in range(5): fracs=load_data.get_fracs(table=table,bins=bins,m=m,c=0.683 ,full_data="assigned spirals") axarr[m].plot(fracs[:,0],fracs[:,1]/fracs[:,2],color=colours[m], linestyle=style,linewidth=2) if errors == True: axarr[m].fill_between(fracs[:,0],fracs[:,3],fracs[:,4] ,color=colours[m],alpha=0.3,linestyle="dashed",hatch="/") return None
'w_bn_b_y', 'w_bn_b_z']: ocp.bound(w,(-4*pi,4*pi)) return ocp if __name__=='__main__': nk = 100 tStart = 20.0 # [sec] tEnd = 21.0 # [sec] T = tEnd - tStart ts = T / float(nk) # load the data (data,interval,_) = load_data.load(tStart, tEnd, ts) # fix some signs in the data data['encoder']['sin_delta'] = -data['encoder']['sin_delta'] conf = makeConf() conf['useVirtualForces'] = 'random_walk' conf['useVirtualTorques'] = 'random_walk' dae = carousel_dae.makeDae(conf) print "setting up ocp..." ocp = setupOcp(dae,conf,T,nk,deg=3) lineRadiusGuess = 1.7 # trajectory for initial guess
i, j, train, test, k, reg , X, y = params print "Month, Fold, Regressor =", i,j,k print reg reg.fit(X[train], y[train]) tr = reg.predict(X[test]) su = reg.predict(X_submission) return [i,j,k,test,tr,su] if __name__ == '__main__': np.random.seed(0) n_folds = 5 n_jobs = 4 X, X_submission, ys, ids, idx = load_data.load() # Smart transformation to avoid logscale in evaluation: ys = np.log(ys/500.0 + 1.0) y_submission = np.zeros((X_submission.shape[0], 12)) # regs = [RandomForestRegressor(n_estimators=1000, n_jobs=1, max_features='auto'), # ExtraTreesRegressor(n_estimators=1000, n_jobs=1, max_features='auto'), # GradientBoostingRegressor(learn_rate=0.001, subsample=0.5, max_depth=6, n_estimators=1000)] # My best submission used just this one: regs = [GradientBoostingRegressor(learn_rate=0.001, subsample=0.5, max_depth=6, n_estimators=20000)] dataset_blend_train = np.zeros((X.shape[0], 12*len(regs)), dtype=np.double) dataset_blend_submission = np.zeros((X_submission.shape[0], 12*len(regs), n_folds), dtype=np.double)
from sklearn import linear_model from sklearn import metrics from sklearn.ensemble import GradientBoostingClassifier import numpy as np import pandas as pd import load_data as load_data import preprocess as pre X_train_A, X_train_B, y_train = load_data.load("train") X_test_A, X_test_B = load_data.load("test") X_train_A, X_train_B = pre.common_name(X_train_A,X_train_B) X_train = pre.proc(X_train_A) - pre.proc(X_train_B) model = linear_model.LogisticRegression(fit_intercept=False) #params = {'n_estimators':200, 'learning_rate':0.01,'max_depth':3, 'random_state':0} #model = GradientBoostingClassifier(**params) model.fit(X_train,y_train['Choice']) preds = model.predict_proba(X_train)[:,1] fpr, tpr, thresholds = metrics.roc_curve(y_train, preds) auc = metrics.auc(fpr,tpr) print 'AuC score on training data:',auc ########################### # PREDICTING ON TEST DATA ########################### X_test_A, X_test_B = pre.common_name(X_test_A,X_test_B) X_test = pre.proc(X_test_A) - pre.proc(X_test_B) preds_test = model.predict_proba(X_test)[:,1]
import pylab from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter import load_data import gc plt.rcParams['figure.figsize'] = 10, 10 #Load the data. train = pd.read_json("input/train.json") test = pd.read_json("input/test.json") ##### set the target of train and test train_X, X_angle = load_data.load(train) test_X, X_test_angle = load_data.load(test) train_y = np.array(train['is_iceberg']) #############pre process on images :) import pre_pros train_X = pre_pros.pre_pros(train_X) test_X = pre_pros.pre_pros(test_X) ### configure the callbacks of the model from keras.callbacks import ModelCheckpoint from keras.callbacks import EarlyStopping from keras.callbacks import ReduceLROnPlateau from keras.callbacks import TensorBoard callbacks_list = [
def contour_plots(cx,cy,grid_spacing,axarr,xlims,ylims,reference_plot, m_plot,reference,levels,alphas,sigma): ''' Plot contours ------------------------------------------------------------ -------------------------------------------------------------------------- Arguments: cx,cy: x+y axis columns for contour (refer to the same cx+cy values in load_data.load). grid_spacing: Number of cells to divide in to for the 2d histogram. axarr: the plotting array as from make_grid or make_stack. xlims,ylims: Lists of the form [lower bound, upper bound] for the x+y histogram limits. reference_plot,m_plot: Plot style. Can have the following values: ------- "contour": pure line contour. "contourf": pure filled contour. "contourf+line": lines+filled contour. "hist": 2D shaded histogram. ------- reference: Can set as "all galaxies" to include all of the volume-limited sample or "all spirals" to only show the spiral sample. levels: Set the contour levels manually here. alphas: 2 item list with [reference transparency, arm sample transparency] values. -------------------------------------------------------------------------- ''' table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d") #table[:,-2:] = np.random.randn(len(table),2) #full_table[:,-2:] = np.random.randn(len(full_table),2) bins,table = load_data.assign(table=table,Nb=20,th=0.5,equal_samples=True, redistribute=False,rd_th=0,ct_th=0,print_sizes=False) if reference == "all galaxies": reference_table = full_table elif reference == "all spirals": reference_table = table for m in range(5): t_sel = table[bins[:,1] == m] if (reference_plot is not None) & ((reference == "all galaxies") or (reference == "all spirals")): contour(table=reference_table,xlims=xlims,ylims=ylims,grid_spacing=grid_spacing ,contour_type=reference_plot,ax=axarr[m],alpha=alphas[0],colour=["k","Greys"],levels=levels ,sigma=sigma) if m_plot != None: contour(table=t_sel,xlims=xlims,ylims=ylims,grid_spacing=grid_spacing ,contour_type=m_plot,ax=axarr[m],alpha=alphas[1],colour=[colours[m],cmaps[m]],levels=levels ,sigma=sigma) axarr[0].set_xlim(xlims) axarr[0].set_ylim(ylims) return None
clf = svm.LinearSVC() clf.fit(X, Y) return clf def incremental_SGD(X, Y, loss): sgd = SGDClassifier(loss=loss, penalty="l2") labels = np.unique(Y) for i in range(X.shape[0]): point_x = X[i] point_y = Y[i] sgd.partial_fit([point_x], [point_y], classes=labels) return sgd (X_train, Y_train), (X_test, Y_test), cross_val_indices = load() X_train = X_train.reshape((X_train.shape[0], ) + (np.prod(X_train.shape[1:]), )) X_test = X_test.reshape((X_test.shape[0], ) + (np.prod(X_test.shape[1:]), )) Y_train = np.argmax(Y_train, axis=1) Y_test = np.argmax(Y_test, axis=1) losses = ["hinge", "log", "modified_huber", "squared_hinge"] for loss in losses: mean_error = 0 for i, val in enumerate(cross_val_indices): sgd = incremental_SGD(X_train[val], Y_train[val], loss) error = sgd.score(X_test, Y_test) print("For validation %d using %s loss" % (i, loss), error) mean_error += error print("Mean error for %s loss was %f" % (loss, mean_error /
from sklearn.cross_validation import StratifiedKFold from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression if __name__ == '__main__': # np.random.seed(0) # seed to shuffle the train set n_folds = 4 n_threads = 2 # verbose = False # shuffle = False print 'Loading train and test sets' trainset, train_label, testset = load_data.load() print 'Finished feature engineering for train and test sets' # if shuffle: # idx = np.random.permutation(train_label.size) # trainset = trainset[idx] # train_label = train_label[idx] # Level 0 classifiers clfs = [RandomForestClassifier(n_estimators=100, n_jobs=n_threads, criterion='gini'), RandomForestClassifier(n_estimators=100, n_jobs=n_threads, criterion='entropy'), ExtraTreesClassifier(n_estimators=100, n_jobs=n_threads, criterion='gini'), ExtraTreesClassifier(n_estimators=100, n_jobs=n_threads, criterion='entropy'), GradientBoostingClassifier(n_estimators=50, learning_rate=0.05, subsample=0.5, max_depth=6)] # Stratified random shuffled cross validation
def run(): global task_name parser = argparse.ArgumentParser() parser.add_argument('-d', '--model_directory', type=str, default=MODEL_DIRECTORY) parser.add_argument('-dd', '--dataset_directory', type=str, default=DATASET_DIRECTORY) parser.add_argument('-bs', '--batch_size', type=int, default=BATCH_SIZE) parser.add_argument('-lr', '--learning_rate', type=float, default=LEARNING_RATE) parser.add_argument('-g', '--gpu', type=int, default=GPU) parser.add_argument('-t', '--task_name', type=str, default=task_name) parser.add_argument('-n', '--hyper_net', type=str, default=HYPER_NET) parser.add_argument('-v', '--variance', type=float, default=RF.VARIANCE) parser.add_argument('-de', '--depth', type=int, default=52) parser.add_argument('-ep', '--epoch', type=int, default=100) args = parser.parse_args() epoch = args.epoch task_name = args.task_name RF.VARIANCE = args.variance directory_output = os.path.join(args.model_directory) depth = args.depth X_train, Y_train, X_test, Y_test = load_data.load() X_test_m = [0] * (10) Y_test_m = [0] * (10) for i in range(10): X_test_m[i] = X_test[i * 1000:(i + 1) * 1000] Y_test_m[i] = Y_test[i * 1000:(i + 1) * 1000] # 縮小する #X_train, Y_train = X_train[0:5000], Y_train[0:5000] #X_test, Y_test = X_test[0:1000] , Y_test[0:1000] X = tf.placeholder("float", [None, 32, 32, 3]) Y = tf.placeholder("float", [None, 10]) time_list = tf.placeholder("float", [None]) W_list = tf.placeholder("float", [None]) learning_rate = tf.placeholder("float", []) hypernet = args.hyper_net # tf.placeholder("string") task_name_tr = tf.placeholder("string") net = RF.SDE_model(X, depth, time_list, W_list, task_name, hypernet, test=False) test_net = RF.SDE_model(X, depth, time_list, W_list, task_name, hypernet, test=True) sess = tf.Session() beta = 1e-3 cross_entropy = -tf.reduce_sum( Y * tf.log(tf.clip_by_value(net, 1e-10, 1.0))) suml2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) loss = cross_entropy + beta * suml2 #opt = tf.train.MomentumOptimizer(learning_rate, 0.9) var_name_list1 = ["W_conv", "b_conv"] + hypernet_variable[0] var_name_list2 = ["W_fc1", "b_fc1", "W_fc2", "b_fc2", "W_fc3", "b_fc3"] #train_op = None correct_prediction = tf.equal(tf.argmax(test_net, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) saver = tf.train.Saver() batch_size = args.batch_size num_data = X_train.shape[0] # with tf.variable_scope("scope", reuse=True ): # var_list1 = [ tf.get_variable(name=x) for x in var_name_list1 ] # var_list2 = [ tf.get_variable(name=x) for x in var_name_list2 ] # if task_name == "ResNet" or task_name =="ResNet_test" or task_name =="Stochastic_Depth": # learning_late = 1e-4 # else: # learning_late = 1e-6 # train_op1 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list1 ) # tf.train.GradientDescentOptimizer(0.000001) # train_op2 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list2 ) # tf.train.GradientDescentOptimizer(0.0001) # tf.group(train_op1, train_op2) # tf.train.GradientDescentOptimizer( 1e-6 ).minimize(cross_entropy) # train_op = tf.train.MomentumOptimizer(args.learning_rate, 0.9).minimize(loss) sess.run(tf.global_variables_initializer()) print(tf.trainable_variables()) late_ad = 1.0 for j in range(epoch): sff_idx = np.random.permutation(num_data) if j < 20: late_ad = 1.0 elif j < 40: late_ad = 0.1 elif j < 60: late_ad = 0.01 else: late_ad = 0.001 for idx in range(0, num_data, batch_size): batch_x = X_train[sff_idx[idx:idx + batch_size if idx + batch_size < num_data else num_data]] batch_y = Y_train[sff_idx[idx:idx + batch_size if idx + batch_size < num_data else num_data]] t, W = RF.tW_def(depth, task_name) feed_dict_train = { X: batch_x, Y: batch_y, learning_rate: args.learning_rate * late_ad, time_list: t, W_list: W, task_name_tr: task_name } # print(sess.run(net,feed_dict=feed_dict_train)) #print(sess.run(tf.argmax(net, 1),feed_dict=feed_dict_train)) sess.run([train_op], feed_dict=feed_dict_train) # for z in (RF.Z_imagetest): #print(sess.run(net,feed_dict= feed_dict_train)) #assert(not np.isnan(sess.run(z,feed_dict=feed_dict_train)).any()) #count += 1 elapsed = time.time() - start_time print("epoch %d end : %.3f seconds elapsed " % (j, elapsed)) # if j % 512 == 0: # a=1 if True or j == 0 or j % 10 == 9 or j + 1 == EPOCH: # 最初 , 10回ごと , 最後 のどれかならテストしてみる t_test, W_test = RF.tW_def(depth, "test") if task_name == "ResNet" or task_name == "Stochastic_Depth": task_name_test = "ResNet_test" else: task_name_test = "test" feed_dict_test = { X: X_test, Y: Y_test, time_list: t_test, W_list: W_test, task_name_tr: task_name_test } if SAVE_ENABLE: print("saving checkpoint...") saver.save( sess, "model/model.ckpt" + str(task_name) + "step" + str(j) + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) print("saved!") acc = 0 for i in range(10): feed_dict_test = { X: X_test_m[i], Y: Y_test_m[i], time_list: t_test, W_list: W_test, task_name_tr: task_name_test } acc += sess.run(accuracy, feed_dict=feed_dict_test) acc = acc / 10.0 print("accuracy after epoch %d : %.3f " % (j, acc), flush=True) # accuracy_summary = tf.scalar_summary("accuracy", accuracy) # ここからパラメータ数計算および列挙 total_parameters = 0 parameters_string = "" for variable in tf.trainable_variables(): sess.run( tf.verify_tensor_all_finite(variable, "NaN in : %s \n" % variable.name)) shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters if len(shape) == 1: parameters_string += ("%s %d, " % (variable.name, variable_parameters)) else: parameters_string += ( "%s %s=%d, " % (variable.name, str(shape), variable_parameters)) print(parameters_string) print("Total %d variables, %s params" % (len(tf.trainable_variables()), "{:,}".format(total_parameters))) sess.close()
recall_single_type = dict_true_positive[predict_type] / dict_truth[ predict_type] F1_single_type = 2 * precision_single_type * recall_single_type / ( precision_single_type + recall_single_type) print(" %s: %.3f, %.3f, %.3f" % (predict_type, precision_single_type, recall_single_type, F1_single_type)) tp += dict_true_positive[predict_type] predict_res += dict_predict[predict_type] ground_truth += dict_truth[predict_type] avg_precision_postag = tp / predict_res avg_recall_postag = tp / ground_truth F1_postag = 2 * avg_precision_postag * avg_recall_postag / ( avg_precision_postag + avg_recall_postag) print("###### pos taging evaluation ######") print("precision: ", avg_precision_postag) print("recall: ", avg_recall_postag) print("F1: ", F1_postag) """print count of each type """ # comm = {} # for key in sorted(dict_true_positive): # comm[key] = dict_truth[key] # print(sorted(comm.items(), key = lambda kv:(kv[1], kv[0]))) return sentences, words_index, dict_index2type = load_data.load( './people-2014/test/0123/') evaluate(sentences, words_index, dict_index2type)
try: fine_tuning except : fine_tuning = False try: load_data_flag except : load_data_flag = True if load_data_flag == True: from load_data import load train_x_s, train_y_s, val_x_s, val_y_s, train_x_t, train_y_t, \ val_x_t, val_y_t, test_x_t, test_y_t = load() from embedders import variable_embedder, embedding # embedding_type = "pca" # embedder = embedding(embedding_type, n_cmp = 100) embedding_type = "no_embedding" embedder = embedding(embedding_type) # ============================================================================= emb_x_s, emb_val_x_s, emb_x_t, emb_val_x_t, emb_test_x = \ variable_embedder(embedder,\ [train_x_s, val_x_s, train_x_t, val_x_t, test_x_t]) # ============================================================================= # emb_x_s = embedder.fit_transform(train_x_s) # emb_x_t = embedder.fit_transform(train_x_t) # emb_val_x_s = embedder.fit_transform(val_x_s) # emb_val_x_t = embedder.fit_transform(val_x_t)
import tensorflow as tf from tensorflow.contrib import rnn from load_data import load from load_data import split10 import numpy as np import time os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' LOGDIR = "/tmp/final_project/10LCacc" import shutil shutil.rmtree(LOGDIR) # load data path = "D:/final project/data/" attribute = ["LCaccX", "LCaccY", "LCaccZ", "label"] print("loading data") train_data, test_data = load(path, attribute) print("loading done! training size: %r, testing size: %r"%(np.shape(train_data), np.shape(test_data))) kernel_size = 7 max_pool_size = 5 batch_size = 50 n_class = 10 natt = len(attribute) - 1 nrow = 200 rnn_size = 100 learning_rate = 0.01 moment_rate = 0.01 def conv_layer(input, size_in, size_out, name="conv"): with tf.name_scope(name):
# If "gen_data" = True, generate a dataset in .p format if gen_data: generate(metadata_path=metadata_path, data_path=testdata_path, batch_size=batch_size_test, label_column_name=label_column_name, is_training=False, fold=fold) else: pass # Calculate mean of each channel #- Load data (.p) patch_mean = np.array([0, 0, 0], np.float32) # Initialize mean of each channel dataframe = load(testdata_path, batch_size_test) # Instance #- Calculate mean of each channel for i, row in dataframe.dataframe.iterrows(): patch = row['patch'] patch_mean[0] += np.mean(patch[:, :, 0]) patch_mean[1] += np.mean(patch[:, :, 1]) patch_mean[2] += np.mean(patch[:, :, 2]) patch_mean = patch_mean / len(dataframe.dataframe['patch']) #- Delete "dataframe" from memory dataframe.left = None # Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config)
parser.add_argument('--batch_size', default=32, type=int, help='Batch size') parser.add_argument('--epochs', default=200, type=int, help='Number of epochs') parser.add_argument('--val_split', default=0.20, type=float, help='Validation split for validating model') parser.add_argument('--is_training', default=1, type=int, help='Training(1) or testing(0)') parser.add_argument('--lr', default=0.0001, type=float, help='Learning rate' ) parser.add_argument('--beta_1', default=0.9, type=float, help='Beta 1') parser.add_argument('--beta_2', default=0.999, type=float, help='Beta 2') parser.add_argument('--epsilon', default=1e-08, type=float, help='Epsilon') parser.add_argument('--decay', default=0.0, type=float, help='Decay rate') parser.add_argument('--data_path', default='data/', help='Path to data folder') parser.add_argument('--save_weights', default=1, type=int, help='Save weights (Yes=1, No=0)') parser.add_argument('--plot', default=1, type=int, help='Plot accuracy or loss curves (Yes=1, No=0)') args = parser.parse_args() #load data x_train, x_test, y_data, y_test, target, _ = ld.load() #define model model = autoencoder(x_train, args) # train or test if args.is_training: hist = train(model=model, data=((x_train, y_data)), args=args) if args.plot: plot(hist) else: # as long as weights are given, will run testing model.load_weights('model_weights.h5') score = test(model=model, data=((x_test, y_test)), target=target) print('MAP score : ', score)
# limitations under the License. from __future__ import print_function import json import numpy as np import math as m import netket as nk from load_data import load from mpi4py import MPI N = 10 path_to_samples = 'ising1d_train_samples.txt' path_to_bases = 'ising1d_train_bases.txt' # Load the data U, sites, training_samples, training_bases = load(N, path_to_samples, path_to_bases) # Constructing a 1d lattice g = nk.graph.Hypercube(length=N, n_dim=1, pbc=False) # Hilbert space of spins from given graph hi = nk.hilbert.Qubit(graph=g) # Machine ma = nk.machine.RbmSpin(hilbert=hi, alpha=1) ma.init_random_parameters(seed=1234, sigma=0.001) # Sampler sa = nk.sampler.MetropolisLocal(machine=ma) # Optimizer
if args.net == 'simple': if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy elif args.net == 'parallel': cuda.check_cuda_available() xp = cuda.cupy batchsize = 50 # データをロード print "load dataset" N , row, col, dataset = ld.load() dataset['data'] = dataset['data'].astype(np.float32) dataset['data'] /= 256 dataset['target'] = dataset['target'].astype(np.int32) X_train, X_test = np.split(dataset['data'], [N]) y_train, y_test = np.split(dataset['target'], [N]) N_test = y_test.size N = y_train.size print y_train X_train = X_train.reshape((len(X_train), 1, row, col)) X_test = X_test.reshape((len(X_test), 1, row, col))
# coding=utf-8 import sys reload(sys) sys.setdefaultencoding('utf8') from sklearn.datasets import load_iris from sklearn import tree from numpy import array import pandas as pd import random from sklearn.cross_validation import train_test_split from load_data import load X, y, attribute_names = load(10) # X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True) lymph = pd.DataFrame(X, columns=attribute_names) lymph['class'] = y # remove class 0 and class 3 lymph_filtered = lymph[(lymph['class']==1) | (lymph['class']==2)] # Split to validate X_train, X_test, y_train, y_test = train_test_split(lymph_filtered[attribute_names], lymph_filtered['class'], test_size=0.20, random_state=42) # Draw decision tree clf = tree.DecisionTreeClassifier() lymph_tree = clf.fit(X_train, y_train) export_tree(lymph_tree,attribute_names)
import numpy as np # preprocessing from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences # keras for model building #from keras.models import Model from keras.layers import GRU, Dense, TimeDistributed, RepeatVector, Bidirectional from keras.layers.embeddings import Embedding from keras.optimizers import Adam from keras.losses import sparse_categorical_crossentropy english_sentences = load_data.load('small_vocab_en.txt') french_sentences = load_data.load('small_vocab_fr.txt') def tokenize(x): x_tkzr = Tokenizer(char_level=False) x_tkzr.fit_on_texts(x) return x_tkzr.texts_to_sequences(x), x_tkzr def pad(x, length=None): if length is None: length = max([len(sentence) for sentence in x]) return pad_sequences(x, maxlen=length, padding='post')
# -*- coding: utf-8 -*- import csv import codecs import json from string import Template from datetime import datetime from datetime import date import load_data timestamp=datetime.now().replace(microsecond=0).isoformat() #load all the data countries, funds, data = load_data.load() mcc_id="US-18" # which one is actual ID? <- this one #mcc_id="US-USG-MCC-18" #translates "Fiscal Year, Fiscal Quarter" into ISO date for the start date of the quarter def fiscal_date(date): try: fy,fq=date.split(',') fy=fy.strip() fq=fq.strip() fy=fy[3:] fy=int(fy) if fq=="FQ 1":
# evaluate mean and covariance of predictive distribution prec_mean = np.dot(Ks, invKy) # (200, 1) # Kinv = invKy / y # print 'Kss', np.shape(Kss), 'Ks', np.shape(Ks), 'kinv', np.shape(Kinv), 'Ks.T', np.shape(Ks.T) # prec_cov = Kss - Ks * Kinv * Ks.T # <<< slow # prec_cov = Kss - Ks * Ks.T * Kinv # return predictive values and either if ReturnCov: # full covariance, or return np.array(prec_mean).flatten(), np.array(prec_cov) else: # just standard deviation return np.array(prec_mean).flatten()#, np.array(np.sqrt(np.diag(prec_cov))) # Load data Q2 time1, flux1, yerr1, cadence1 = load_data.load('012317678', quarter = 3, return_cadence = True) # Load data Q3 time2, flux2, yerr2, cadence2 = load_data.load('012317678', quarter = 4, return_cadence = True) # x1 = time1[:100]; x2 = time1[200:300] # y1 = flux1[:100]; y2 = flux1[200:300] # c1 = cadence1[:100]; c2 = cadence1[200:300] length = 500 x1 = time1[:length]; x2 = time2[:length] y1 = flux1[:length]; y2 = flux2[:length] c1 = cadence1[:length]; c2 = cadence2[:length] # Adjust flux 3 so that it follows on from flux 2 and mean centre y2 -= y1[-1] y1 -= np.mean(y1) y2 -= np.mean(y2)
from tensorflow import keras import tensorflow as tf import matplotlib.pyplot as plt import sys import load_data as ld import network as net # define minimum needed variables data_dir = "data" # data location file_ext = ".npy" # numpy bitmap arrays classes = ["circle", "square", "hexagon"] # classes to train network on # load data x, y = ld.load(classes=classes, data_dir=data_dir, file_ext=file_ext, samples=10, reload_data=False) x_train, y_train, x_test, y_test = ld.divide_into_sets(input=x, response=y, ratio=0.2) # train network yolo = net.build_network(y=y_train, batch=6, version=1, input_size=(448, 448, 3), output_size=392) yolo = net.train_network(network=yolo, x=x_train, y=y_train,
params = sys.argv mode = params[1] config = params[2] binary_encode = False if "--encode" in params: binary_encode = True print binary_encode # amount of rows if hasattr(data, "toarray"): data = data.toarray() data = data.load(binary_encode) orig_shape = np.shape(data) print orig_shape orig_rows = orig_shape[0] orig_dimension = orig_shape[1] origDistances = np.empty((orig_rows, orig_rows)) r = range(orig_rows) for i in r: for j in r: if i == j: origDistances[i][j] = .0 else:
def xy_plot(cx,cy,Nb,equal_samples,reference,style,axarr,standard_dev): ''' Plot binned data --------------------------------------------------------- -------------------------------------------------------------------------- Arguments: cx,cy: x+y axis columns for contour (refer to the same cx+cy values in load_data.load). Nb: Number of bins to plot. equal_samples: if set as True, all bins will have the same number of galaxies. reference: Can be set as 'all galaxies', 'all spirals' or None. style: linestyle to plot eg. 'dashed' or 'solid' reference_plot,m_plot: Plot style. Can have the following values: axarr: plot array as from make_grid or make_stack. standard_dev: if True, the standard deviation is plotted as a filled contour. -------------------------------------------------------------------------- ''' table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d") bins,table = load_data.assign(table=table,Nb=Nb,th=0.5,equal_samples=equal_samples, redistribute=False,rd_th=0,ct_th=0,print_sizes=False) full_bins,full_table = load_data.assign(table=full_table,Nb=Nb,th=0.5,equal_samples=equal_samples, redistribute=False,rd_th=0,ct_th=0,print_sizes=False) if reference == "all galaxies": reference_table,reference_bins = full_table,full_bins else: reference_table,reference_bins = table,bins if (reference != "all spirals") and (reference != None): print("Invalid 'reference' value; using 'all spirals'") xy_r = load_data.get_xy_binned(reference_table,reference_bins) for m in range(5): t_sel = table[bins[:,1] == m] b_sel = bins[bins[:,1] == m] xy = load_data.get_xy_binned(t_sel,b_sel) if reference != None: axarr[m].plot(xy_r[:,0],xy_r[:,2],color="k",linewidth=2,linestyle=style) if standard_dev == True: axarr[m].fill_between(xy_r[:,0],xy_r[:,2]+xy_r[:,3],xy_r[:,2]-xy_r[:,3],color="k",alpha=0.5) axarr[m].plot(xy[:,0],xy[:,2],color=colours[m],linewidth=2,linestyle=style) if standard_dev == True: axarr[m].fill_between(xy[:,0],xy[:,2]+xy[:,3],xy[:,2]-xy[:,3],color=colours[m],alpha=0.5) return None
from sklearn.metrics import roc_auc_score import lightgbm as lgb import numpy as np import load_data if __name__ == "__main__": X, y, X_sub, feature_names = load_data.load() valSize = int(X.shape[0] * 0.3) X_train = X[:-valSize] y_train = y[:-valSize] X_val = X[-valSize:] y_val = y[-valSize:] clf = lgb.LGBMClassifier(n_estimators=170, learning_rate=0.05, num_leaves=5000, colsample_bytree=.9, subsample=1, silent=True, min_child_weight=1, seed=1000, min_child_samples=10, reg_alpha=0.01, max_bin=5000) print("Training...") clf.fit(X_train, y_train,
import tensorflow as tf import numpy as np import load_data np.random.seed(100000) X_train_text, X_test_text, X_train_audio, X_test_audio, X_train_gest, X_test_gest, X_train_video, X_test_video, Y_train, Y_test = load_data.load( ) print(X_train_audio.shape, X_test_audio.shape) if __name__ == '__main__': from sklearn import svm, tree, ensemble # f = np.concatenate((X_train_audio, X_train_text, X_train_video, X_train_gest), axis=1) X_train = np.concatenate( (X_train_audio, X_train_text, X_train_video, X_train_gest), axis=1) X_test = np.concatenate( (X_test_audio, X_test_text, X_test_video, X_test_gest), axis=1) print(X_train.shape) print("SVM") clf = svm.SVC() clf.fit(X_train, np.argmax(Y_train, axis=1)) # print(clf.support_vectors_) a = clf.predict(X_test) print(a.shape) y_true = np.argmax(Y_test, 1) print(np.mean(y_true == a))
def main(): PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__)) train_path = os.path.join(PROJECT_ROOT, "data/train2.data") test_path = os.path.join(PROJECT_ROOT, "data/test2.data") train_log_path = os.path.join(PROJECT_ROOT, "log/train/") test_log_path = os.path.join(PROJECT_ROOT, "log/test/") dev_log_path = os.path.join(PROJECT_ROOT, "log/dev/") X, y = ld.load(train_path) #debug X = X.reshape([-1, 6, 6, 1]) train_num = int(X.shape[0] * Config.trainrate) X_train = X[:train_num] y_train = y[:train_num] X_dev = X[train_num:-1] y_dev = y[train_num:-1] X_test, y_test = ld.load(test_path) X_test = X_test.reshape([-1, 6, 6, 1]) print("train size :", X_train.shape, y_train.shape) print("dev size :", X_dev.shape, y_dev.shape) print("test size :", X_test.shape, y_test.shape) print("start training") with tf.Graph().as_default(): config = Config() nn = NN(config) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=0.5) #必须在session外面 y_train = tf.one_hot(y_train, depth=Config.n_classes) y_test = tf.one_hot(y_test, depth=Config.n_classes) y_dev = tf.one_hot(y_dev, depth=Config.n_classes) shuffle_batch_x, shuffle_batch_y = tf.train.shuffle_batch( [X_train, y_train], batch_size=Config.batch_size, capacity=10000, min_after_dequeue=5000, enqueue_many=True) with tf.Session() as session: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(train_log_path, session.graph) test_writer = tf.summary.FileWriter(test_log_path) dev_writer = tf.summary.FileWriter(dev_log_path) session.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord) y_test, y_dev = session.run([y_test, y_dev]) i = 0 try: while not coord.should_stop(): #for i in range(Config.n_epochs * X_train.shape[0] // Config.batch_size): #offset = (i * Config.batch_size) % (X_train.shape[0] - Config.batch_size) #batch_x = X_train[offset:(offset + Config.batch_size), :] #batch_y = y_train[offset:(offset + Config.batch_size)] batch_x, batch_y = session.run( [shuffle_batch_x, shuffle_batch_y]) loss = nn.train_on_batch(session, batch_x, batch_y, merged, train_writer, i) i += 1 if i % 1000 == 0: dev_acc = nn.accuracy(session, X_dev, y_dev, "dev", merged, dev_writer, i) test_acc = nn.accuracy(session, X_test, y_test, "test", merged, test_writer, i) print("step:", i, "loss:", loss, "dev_acc:", dev_acc, "test_acc:", test_acc) saver.save(session, os.path.join(PROJECT_ROOT, "model/model_ckpt"), global_step=i) except tf.errors.OutOfRangeError: print("done") finally: coord.request_stop() coord.join(threads)
@contact: [email protected] @file: paipaidai_xgboost.py @time: 19-7-1 上午8:39 @desc: """ import pandas as pd import numpy as np from sklearn.model_selection import StratifiedKFold from sklearn.externals import joblib from lightgbm.sklearn import LGBMClassifier import time from sklearn.metrics import mean_squared_error, mean_absolute_error, log_loss, accuracy_score from load_data import load train_values, test_values, clf_labels, clf_labels_r, clf_labels_2 = load() train_num = 1000000 print(train_values.shape) # 五折验证也可以改成一次验证,按时间划分训练集和验证集,以避免由于时序引起的数据穿越问题。 skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019) #5折交叉验证 clf = LGBMClassifier( #lightGBM分类模型 learning_rate=0.05, n_estimators=10000, subsample=0.8, subsample_freq=1, colsample_bytree=0.8)
def setUp(self): self.test_load = l.load('indicator gapminder gdp_per_capita_ppp.xlsx')
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS.batch_size print("\nParameters:") for attr, value in sorted(FLAGS.__flags.iteritems()): print("{}={}".format(attr.upper(), value)) print("") # Load data print("Loading data...") u, i, y = load("ml-100k") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) u_shuffled = u[shuffle_indices] i_shuffled = i[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation u_train, u_dev = u_shuffled[:-1000], u_shuffled[-1000:] i_train, i_dev = i_shuffled[:-1000], i_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
n_folds = 10 shuffle = False try: X_n, X_submission_n = pickle.load(open("X_n.pkl", "rb")) except (OSError, IOError) as e: X_n, X_submission_n = nn.load_data_for_decoder() # X_n = encode.fit_transform(X_n) # X_submission_n = encode.fit_transform(X_submission_n) X_n = encode.get_encoded_feature(X_n) X_submission_n = encode.get_encoded_feature(X_submission_n) pickle.dump((X_n, X_submission_n), open("X_n.pkl", "wb")) X, y, X_submission, Id = load_data.load() X, X_submission = X.values, X_submission.values y = y.values # include auto-encode features add_feats = False if (add_feats): X = np.append(X, X_n, axis=1) X_submission = np.append(X_submission, X_submission_n, axis=1) else: # add data to the training dataset X_n, X_submission_n = nn.load_data_for_decoder() X_n = encode.fit_transform(X_n) # add data by fit transform the orginal into same attributes data and then add to the data set X = np.append(X, X_n, axis=0) y = np.append(y, y, axis=0)
type=int, default=16, help='Number of hidden units.') parser.add_argument('--link-pred', action='store_true', default=False, help='Enable Link Prediction Loss') args = parser.parse_args() device = "cuda" if not args.no_cuda and torch.cuda.is_available() else "cpu" # np.random.seed(args.seed) # torch.manual_seed(args.seed) # if device == 'cuda': # torch.cuda.manual_seed(args.seed) adj, features, labels = load_data.load() max_num_nodes = max([g.shape[0] for g in adj]) labels = torch.from_numpy(labels).to(device) idx = np.arange(600) np.random.RandomState(seed=124).shuffle(idx) idx_train, idx_test = idx[:480], idx[480:] model = Model(pool_size=int(max_num_nodes * 0.25), device=device).to(device) model.train() # optimizer = optim.SGD(model.parameters(), lr=1e-5) optimizer = optim.Adam(model.parameters()) for e in tqdm(range(args.epochs)): pred_labels = [] for i, idx in enumerate(idx_train): adj_train = torch.from_numpy(adj[idx]).to(device).float()
n = 100 ps = 10**(np.random.rand(n)*2.) # FIXME: yes Dan, these should be np.exp taus = 10**np.random.rand(n) myamp = 10**(np.random.rand(n)*1.18) names = ['010972873', '011137075', '011502597', '011717120', '005607242', \ '006370489', '006442183', '006531928', '006603624', '009574283'] m = 0 for i in range(n): for c, KID in enumerate(names): m += 1 star = m # load Kepler lcs, join quarters together lc_files = np.array(glob.glob("%s/kplr%s*"%(datadir, KID))) x, y, yerr = load(lc_files[0]) for j in range(1, len(lc_files)): x = np.concatenate((x, load(lc_files[j])[0])) y = np.concatenate((y, load(lc_files[j])[1])) yerr = np.concatenate((yerr, load(lc_files[j])[2])) # generate simulated lcs doplot = True pars, ts = mklc(x, nspot, incl, fixedamp, taus[i], diffrot, \ dur, samp, noise, ps[i], Amplitude, doplot) # plot if doplot == True: pylab.savefig('%s/%s'%(savedir, (star))) print "saving data..."
from load_data import load from itertools import combinations, permutations args = sys.argv #choose the algorithm #filename = args[1] filename = 'SVD_reconUrban_F210.npz' sig = float(args[1]) obs = float(args[2]) #fraction observed #sig = .0003 #obs = 0.7 init_iter = 10 iterate = 100 #obs = 0.7 limit = 10000 full_data = load(filename) train_count = 500 test_count = 100 total_data = train_count + test_count full_data = full_data[:total_data, :30] maxarray = np.amax(full_data) scale = 1.0 / maxarray full_data = scale * full_data datapoints, dimension = full_data.shape sig_w = 0.1 sig_test = sig #full_data,Z_gen = construct_data(data_dim,data_count + held_out,sig,data_type) if train_count + test_count > datapoints: data_count = datapoints #indices = list(combinations([i for i in range(datapoints)],total_data))
steps_per_epoch=X_train.shape[0] / BATCH_SIZE, validation_data=(X_val, Y_val), epochs=epochs, verbose=1, callbacks=[mcp_save], shuffle=True) return model #### load data and reshapeimport sys # sys.argv[1] = './../myData/hw8_data/train.csv' print('training file:', sys.argv[1]) D_SET = 10 X_train, Y_train = load_data.load(sys.argv[1], 1) X_train = X_train.reshape(-1, 48, 48, 1) #### rescale X_train = X_train / 255. print('X_train shape : ', X_train.shape) print('Y_train shape : ', Y_train.shape) #### convert class vectors to binary class matrices (one hot encoding vector) Y_train = np_utils.to_categorical(Y_train, 7) #### build model for i in range(1): print('No.' + str(i)) SaveModel_name = 'model_best_62998.h5'
wiki = reg1.sub(' (', wiki) wiki = reg2.sub(')', wiki) html = parse(wiki, showToc=False) self.load_html_string(html, 'file:///') else: self.open(os.path.join(self.runpath, 'err.html')) def _pic(self, m): i1, i2, i3 = m.group(1).split('|') f = '_'.join([cat[i1], i2, i3]) + '.gif' f = os.path.join(self.runpath, 'pic', f) return '<img src="%s"/>' % f __WV__ = WV() import load_data __ALL__, __BU__, __HUA__, __YIN__ = load_data.load() __BLACK__, __BLACK_BU__ = load_data.black() __BLACK_REF__ = None class Name(gtk.VBox): '''名字''' def __init__(self): gtk.VBox.__init__(self) self.label = gtk.Label('×') self.label.set_use_markup(True) self.ev = gtk.EventBox() self.ev.add(self.label) self.ev.connect('button-press-event', lambda o,e:__WV__.show(self.label.get_text().decode('utf8')[0])) self.b = gtk.Button('换字') self.b.connect('clicked', self.choose)
import numpy import cPickle import theano import theano.tensor as tensor import theano.tensor.shared_randomstreams as RS from theano import config as cf import load_data dataset = 'data/authors.pkl' train, valid, test = load_data.load(dataset) def ngram2(seqs, vocab_size=1000): freq_pair = numpy.zeros((vocab_size, vocab_size)) freq_w = numpy.zeros((vocab_size, )) for seq in seqs: for i, w in enumerate(seq): if w >= vocab_size: w1 = 0 else: w1 = w if i == len(seq) - 1: freq_w[w1] += 1 continue if seq[i + 1] >= vocab_size: w2 = 0 else: w2 = seq[i + 1] freq_pair[w1, w2] += 1 freq_w[w1] += 1
verbose = True shuffle = False # print("Reading the data") # dataTrain = cu.get_dataframe(train_file) # # print("Extracting features") # fea = features.extract_features(feature_names, dataTrain) # X = fea # y = dataTrain["OpenStatus"] # # dataTest = cu.get_dataframe(test_file) # test_features = features.extract_features(feature_names, dataTest) # X_submission = test_features X, y, X_submission = load_data.load() if shuffle: idx = np.random.permutation(y.size) X = X[idx] y = y[idx] skf = list(StratifiedKFold(y, n_folds)) clfs = [RandomForestClassifier(n_estimators=50, n_jobs=1, criterion='gini', verbose=2, compute_importances=True), RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), GradientBoostingClassifier(learn_rate=0.05, subsample=0.5, max_depth=6, n_estimators=50)] print "Creating train and test sets for blending."
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Input, Dense, Bidirectional, LSTM from tensorflow.keras.optimizers import Adam import load_data grid_num = 1 X_train, y_train, X_test, y_test = load_data.load(grid_num) model = Sequential() model.add( Bidirectional( LSTM(units=128, input_shape=(X_train.shape[1], X_train.shape[2])))) model.add(Dense(units=1, activation="relu")) adam = Adam(learning_rate=0.1) model.compile( optimizer=adam, loss="mse", ) model.fit(X_train, y_train, epochs=15, validation_split=0.1) print(model.evaluate(X_test, y_test))
def test_login_handler(self): load() response = self.testapp.get('/login') self.assertEquals(response.status_int, 302, msg="Not redirected.")
import pandas as pd import numpy as np from load_data import load from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error def execute(X_train, X_test, y_train, y_test): clf = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc_score = mean_squared_error(y_test, y_pred) print(np.sqrt(acc_score) * 48) if __name__ == '__main__': X_train, y_train = load() X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33, random_state=42) execute(X_train, X_test, y_train, y_test)
"""Logloss, i.e. the score of the bioresponse competition. """ attempt = np.clip(attempt, epsilon, 1.0 - epsilon) return -np.mean(actual * np.log(attempt) + (1.0 - actual) * np.log(1.0 - attempt)) if __name__ == '__main__': np.random.seed(0) # seed to shuffle the train set n_folds = 10 verbose = True shuffle = False X, y, X_submission = load_data.load() if shuffle: idx = np.random.permutation(y.size) X = X[idx] y = y[idx] skf = list(StratifiedKFold(y, n_folds)) clfs = [ RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
def train(): with tf.Graph().as_default(): # global_step = tf.contrib.framework.get_or_create_global_step() data, train_size, test_size, input_shape, nclass = load_data.load( FLAGS.data) [X_train, y_train, X_test, y_test] = data imgs_tr, _, imgs_te, _ = load_data.load_pure(FLAGS.data) global_step = tf.Variable(0, trainable=False) learning_rate_placeholder = tf.placeholder(tf.float32) if FLAGS.lr_strategy == 'exp': learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, FLAGS.decay_step, FLAGS.decay_rate, staircase=True) else: learning_rate = learning_rate_placeholder images = tf.placeholder(tf.float32, input_shape) labels = tf.placeholder(tf.int32, [None] + [nclass]) is_training = tf.placeholder(tf.bool) with tf.variable_scope('Teacher'): t_logits = nets.get_modelf(FLAGS.t_model)(images, is_training=is_training, nclasses=nclass, k=FLAGS.t_vggsize) t_loss = nets.teacher_cl_loss(labels, t_logits) t_train_op = nets.t_train_op( t_loss, utils.get_all_variables_from_scope('Teacher'), learning_rate=learning_rate_placeholder, global_step=global_step) if FLAGS.s_model != 'no': with tf.variable_scope('Student'): s_logits = nets.get_modelf(FLAGS.s_model)( images, is_training=is_training, nclasses=nclass, k=FLAGS.s_vggsize) s_loss = nets.student_cl_loss(t_logits, s_logits) s_train_op = nets.train_op( s_loss, utils.get_all_variables_from_scope('Student'), learning_rate=learning_rate_placeholder) else: s_train_op = tf.no_op() s_loss = t_loss s_logits = t_logits ds_logits = None if FLAGS.go_deeper: with tf.variable_scope('Deep-Student'): ds_logits = nets.get_modelf(FLAGS.s_model)( images, is_training=is_training, nclasses=nclass, k=FLAGS.s_vggsize) ds_loss = nets.student_cl_loss(s_logits, ds_logits) ds_train_op = nets.train_op( ds_loss, utils.get_all_variables_from_scope('Deep-Student'), learning_rate=learning_rate_placeholder) es_logits = None # if FLAGS.sgld_target: # ensemble_logits = tf.placeholder(tf.float32, [None] + [nclass]) # with tf.variable_scope('Ensemble-Student'): # es_logits = nets.get_modelf(FLAGS.s_model)(images, # is_training=is_training, # nclasses=nclass) # es_loss = nets.student_cl_loss(ensemble_logits, es_logits) # es_train_op = nets.train_op(es_loss, # utils.get_all_variables_from_scope('Ensemble-Student'), # learning_rate=learning_rate) session_config = tf.ConfigProto(allow_soft_placement=True) session_config.gpu_options.per_process_gpu_memory_fraction = 0.80 init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: n_iter_to_train = FLAGS.n_epochs * train_size / FLAGS.batch_size n_iter_per_epoch = train_size / FLAGS.batch_size ensemble = PseudoEnsemble(np.zeros_like(y_test)) train_ensemble = PseudoEnsemble(np.zeros_like(y_train)) eval_once = get_eval_once(X_test, y_test, imgs_te, sess, is_training, images, t_logits, s_logits, es_logits, ds_logits, ensemble) tr_eval_once = get_eval_once(X_train, y_train, imgs_tr, sess, is_training, images, t_logits, s_logits, es_logits, ds_logits, ensemble, mode='train') step = 0 sess.run(init_op) es_loss_val = -1 k = 0 for k in range(FLAGS.n_epochs): for X_batch_aug, y_batch, idxs in load_data.batch_iterator_train_crop_flip( X_train, y_train, FLAGS.batch_size): step += 1 _lr = FLAGS.lr if FLAGS.lr_strategy == 'linear': _lr = FLAGS.lr if k < FLAGS.lr_to0_from else FLAGS.lr * ( n_iter_to_train - step) / (n_iter_to_train - FLAGS.lr_to0_from * n_iter_per_epoch) if k > 190: _lr = 0.00001 if FLAGS.lr_strategy == 'step': if k < 100: _lr = 0.001 elif k < 200: _lr = 0.0001 elif k < 300: _lr = 1e-5 else: _lr = 1e-6 if _lr < 0: break # print('learning rate', _lr) t_loss_val, s_loss_val, _, _ = sess.run( [t_loss, s_loss, t_train_op, s_train_op], { images: X_batch_aug, labels: y_batch, is_training: True, learning_rate_placeholder: _lr }) # if step % FLAGS.s_step == 0 and k >= FLAGS.start_snn: # s_loss_val, _ = sess.run([s_loss, s_train_op], { # images: X_batch_aug, # labels: y_batch, # is_training: True, # learning_rate: _lr # }) if FLAGS.go_deeper and step % FLAGS.s_step == 0 and k >= FLAGS.start_snn2: ds_loss_val, _ = sess.run( [ds_loss, ds_train_op], { images: X_batch_aug, labels: y_batch, is_training: True, learning_rate_placeholder: _lr }) # if FLAGS.sgld_target: # es_loss_val, _ = sess.run([es_loss, es_train_op], { # images: X_batch_aug, # ensemble_logits: train_ensemble._y[idxs], # is_training: True, # learning_rate: _lr # }) with open(FLAGS.train_dir + '/my_train_logs.csv', 'a') as f: f.write('{},{},{},{},{}\n'.format( k, step, t_loss_val, s_loss_val, es_loss_val)) if FLAGS.ensemble_step != -1 and step % FLAGS.ensemble_step == 0: t_logits_val = get_logist(X_test, nclass, sess, is_training, t_logits, images) ensemble.add_estimator(t_logits_val) if FLAGS.sgld_target or FLAGS.make_sgld_target: t_logits_val = get_logist(X_train, nclass, sess, is_training, t_logits, images) train_ensemble.add_estimator(t_logits_val) np.save( FLAGS.train_dir + 'ensemble-train-logits.npy', train_ensemble._y) if step % FLAGS.eval_frequency == 0: t_acc = eval_once(k, step) if k % FLAGS.save_model_frequency == 0: saver.save(sess, FLAGS.train_dir + '/model.ckpt')
import load_data as ld # File paths CHECKPOINT_PATH = "../checkpoints" WEIGHTS_FILE_NAME = '/weights' MODEL_FILE_NAME = '/model.h5' # Training parameters epochs = 200 # Number of epochs batch_size = 32 # Training batch size validation_split = 0.1 # Fraction of training data for validation verbose = 1 # Show progress bar # Load training data print("Loading training data...") X_train, Y_train = ld.load(mode="train") # Create model print("Creating model...") model = cm.create() # Train model print("Training model...") model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=verbose) # Save model and model weights
if __name__ == "__main__": # setting the hyper parameters parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=1, type=int, help='Batch size') parser.add_argument('--epochs', default=400, type=int, help='Number of epochs') parser.add_argument('--is_training', default=1, type=int, help='Training(1) or testing(0)') parser.add_argument('--data_path', default='data/',help='Path to data folder') parser.add_argument('--save_weights', default=1, type=int, help='Save weights (Yes=1, No=0)') parser.add_argument('--plot', default=1, type=int, help='Plot accuracy or loss curves (Yes=1, No=0)') args = parser.parse_args() #load data (X_train, target_input), (X_val, target_val), (X_test, true_labels) = ld.load() image_height, image_width, depth = 96, 96, 1 input_dim = (image_height,image_width,depth) #define model model = model_fkpd(input_dim) # train or test if args.is_training: hist = train(model=model, data=((X_train, target_input), (X_val, target_val)), args=args) if args.plot: plot(hist) else: # as long as weights are given, will run testing model.load_weights('model_weights.h5') error = test(model=model, data=((X_test, true_labels))) print("RMSE Value : ", np.sqrt(error/(30*X_test.shape[0])))
import load_data from create_model import * # -data: dataset # -p: p # -log: log & model saving file # -dim: dimension of highway layers # -shared: 1 if shared, 0 otherwise, 2 if both ########################## LOAD DATA ############################################### print 'Loading data...' arg = load_data.arg_passing(sys.argv) dataset, nlayers, inpShape, saving, dim, shared = arg['-data'], arg['-nlayers'], arg['-inpShape'], arg['-saving'], arg['-dim'], arg['-shared'] train, valid, test = load_data.load(dataset) log = 'log/' + saving + '.txt' train_x, train_y = train[0], train[1] valid_x, valid_y = load_data.shuffle(valid[0], valid[1]) test_x, test_y = load_data.shuffle(test[0], test[1]) n_classes = max(train_y) if n_classes > 1: n_classes += 1 if n_classes == 1: loss = 'binary_crossentropy' metric = f1 metric_str = 'f1' else: loss = 'sparse_categorical_crossentropy'
import pandas import load_data import numpy as np from models import stanfordnlp_model print('Train data Analysis') df = load_data.load('dataset/gap-development.tsv') print("pronoun after A count", (df['A-offset'] > df['Pronoun-offset']).sum(axis=0)) print("pronoun after B count", (df['B-offset'] > df['Pronoun-offset']).sum(axis=0)) print("A is True count", df['A-coref'].sum(axis=0)) print("B is True count", df['B-coref'].sum(axis=0)) print("non both count", ((df['A-coref'] == False) & (df['B-coref'] == False)).sum(axis=0)) print("pronoun after A AND A is True count", (df['A-coref'] & (df['A-offset'] > df['Pronoun-offset'])).sum(axis=0)) print("pronoun Bfter B BND B is True count", (df['B-coref'] & (df['B-offset'] > df['Pronoun-offset'])).sum(axis=0)) a_df = df['Pronoun-offset'] - df['A-offset'] b_df = df['Pronoun-offset'] - df['B-offset'] print("A is near pronoun than B AND A is True count", (a_df.abs() < b_df.abs())[df['A-coref']].sum(axis=0)) print("A is near pronoun than B AND B is True count", (a_df.abs() < b_df.abs())[df['B-coref']].sum(axis=0)) print("A is near pronoun than B", (a_df.abs() < b_df.abs()).sum(axis=0)) print("Pronoun unique values", df['Pronoun'].unique()) print("A unique values", df['A'].unique()) print("B unique values", df['B'].unique())
Copyright 2012, Emanuele Olivetti. BSD license, 3 clauses. """ from __future__ import division import numpy as np import load_data from sklearn.linear_model import LogisticRegression def logloss(attempt, actual, epsilon=1.0e-15): """Logloss, i.e. the score of the bioresponse competition. """ attempt = np.clip(attempt, epsilon, 1.0-epsilon) return - np.mean(actual * np.log(attempt) + (1.0 - actual) * np.log(1.0 - attempt)) if __name__ == '__main__': X, y, X_submission, y_real = load_data.load() clf = LogisticRegression() clf.fit(X, y) y_submission = clf.predict_proba(X_submission)[:,1] print "Linear stretch of predictions to [0,1]" y_submission = (y_submission - y_submission.min()) / (y_submission.max() - y_submission.min()) print "Saving Results." np.savetxt(fname='../Submissions/blendML50v2_1749rf500_3.csv', X=y_submission, fmt='%0.9f')
import math def rmsle(y, y0): assert len(y) == len(y0) return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2))) if __name__ == '__main__': #iniciamos la seed para la aleatoriedad y creamos un 5 fold cross validation np.random.seed(0) n_folds = 5 #cagamos el dataset X, X_submission, ys, ids, idx = load_data.load() # evitamos el logscale en la evaluacion: ys = np.log(ys/500.0 + 1.0) y_submission = np.zeros((X_submission.shape[0], 12)) #se prueba con n stimators 1000 para que se ejecute más rápido regs = [GradientBoostingRegressor(learning_rate=0.001, subsample=0.5, max_depth=6, n_estimators=10000)] dataset_blend_train = np.zeros((X.shape[0], 12*len(regs)), dtype=np.double) dataset_blend_submission = np.zeros((X_submission.shape[0], 12*len(regs), n_folds), dtype=np.double) for i in range(12): print "Month", i y = ys[:,i]