Exemple #1
0
 def test_links_handler(self):
     load()
     response = self.testapp.get('/links')
     self.assertEquals(response.status_int, 200)
     self.assertEquals(response.content_type, "application/json")
     data = json.loads(response.normal_body)
     self.assertEquals(len(data), 8)
     # create link
     before = len(Link.query().fetch())
     response = self.testapp.post_json('/links', {'title': 'Foo',
                                       'url': 'http://www.foo.bar'})
     self.assertEquals(response.status_int, 201)
     self.assertEquals(len(Link.query().fetch()), before + 1)
     # Update link
     response = self.testapp.post_json('/links', {'title': 'Foo Bar',
                                       'url': 'http://www.foo.bar'})
     self.assertEquals(response.status_int, 200)
     # Quantity of links didn't change
     self.assertEquals(len(Link.query().fetch()), before + 1)
     link = Link.get_by_id('http://www.foo.bar')
     self.assertEquals(link.title, 'Foo Bar')
     # Delete link
     before = len(Link.query().fetch())
     response = self.testapp.post_json('/links',
                                       {'action': 'delete',
                                        'title': 'Foo Bar',
                                        'url': 'http://www.foo.bar'})
     self.assertEquals(response.status_int, 200)
     self.assertEquals(before - 1, len(Link.query().fetch()))
Exemple #2
0
 def test_skill_model(self):
     load()
     skill = Skill(title='Python', desc='Cool',
                   links=['http://www.github.com/'])
     skill.approve()
     self.assertEquals(skill.approved, 1)
     skills = Skill.all()
     self.assertEquals(len(skills), 4)
     s_key = skill.put()
     skill.id = s_key.id()
     skill.put()
     Skill.get(skill.id)
     self.assertEquals(skill.id, s_key.id())
    def __init__(self):
        np.random.seed(0)
        self.n_folds = 5
        self.train_X, self.train_Y, self.test_X = load_data.load()
        self.skf = list(StratifiedKFold(self.train_Y, self.n_folds))

        self.clfs =[VotingClassifier(estimators=[('lr', LogisticRegression(random_state=1)), ('rf', RandomForestClassifier(random_state=1)), ('gnb', GaussianNB())], voting='soft'),
                    AdaBoostClassifier(n_estimators=100, learning_rate=0.5),
                    KNeighborsClassifier(3),
                    GaussianNB(),
                    LinearDiscriminantAnalysis(),
                    QuadraticDiscriminantAnalysis(),
                    DecisionTreeClassifier(random_state=0),
                    RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
                    RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
                    ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
                    ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
                    GradientBoostingClassifier(learning_rate=0.05, subsample=0.3, max_depth=8, n_estimators=100),
                    xgb.XGBClassifier( colsample_bytree=0.3, learning_rate=0.1, n_estimators=100,objective='binary:logistic', reg_alpha=0, reg_lambda=1,)]

        self.stacking_train = np.zeros((self.train_X.shape[0], len(self.clfs)))
        self.stacking_test = np.zeros((self.test_X.shape[0], len(self.clfs)))
        self.clf = LogisticRegression(class_weight = 'balanced')
        self.fit()
        self.predict()
def line_fit(cx,cy,x_range,curve,style,reference,reference_plot,m_plot,axarr):
  
    x_guide = np.linspace(x_range[0],x_range[1],100)
  
    table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d")
    bins,table = load_data.assign(table=table,Nb=20,th=0.5,equal_samples=True,
        redistribute=False,rd_th=0,ct_th=0,print_sizes=False)
    
    if reference == "all galaxies":
        reference_table = full_table
    else:
        reference_table = table
  
    if curve == True:  
        def f(x,k,c1,c2):
            return k**(-x + c1) + c2
    else:
        def f(x,k,c1,c2):
            return k*x + c1 
    
    if reference_plot == True:
        p_r,c_r = curve_fit(f,reference_table[:,-1],reference_table[:,-2],maxfev=10000)
        
    for m in range(5):
        
        if reference_plot == True:
            axarr[m].plot(x_guide,f(x_guide,p_r[0],p_r[1],p_r[2]),color="k",linewidth=2,linestyle=style)
            
        if m_plot == True:
            t_m = table[bins[:,1] == m]
            p,c = curve_fit(f,t_m[:,-1],t_m[:,-2],maxfev=10000)
            axarr[m].plot(x_guide,f(x_guide,p[0],p[1],p[2]),color=colours[m],linewidth=2,linestyle=style)
            
    return None
Exemple #5
0
def main():
   # load data
   print "Loading..."
   objects, training, validation = load()

   # calculate similarities
   print "Calculating similarities..."
   similarities = generate_similar_users(training, min_support=5)

   # see how well predict_rating performs
   print "Validating..."
   error = 0
   error2 = 0 # error from only cases where we could give a rating
   num_validations = 0
   for user in validation:
      for item in validation[user]:
         p = predict_rating(user, item, training, similarities)
         a = validation[user][item]
         error += abs(p[0] - a)
         num_validations += 1
         if p[1]:
            error2 += abs(p[0] - a)
         #print "Predicted: %0.2f,\t Actual: %0.2f" % (p[0], a)
   print "Error, on average, was %0.2f" % (error/num_validations)
   print "Error, excluding not enough data cases, was, on average, %0.2f" % (error2/num_validations)
Exemple #6
0
 def test_skills_handler(self):
     load()
     response = self.testapp.get('/skills')
     self.assertEquals(response.status_int, 200)
     self.assertEquals(response.content_type, 'application/json')
     data = json.loads(response.normal_body)
     self.assertEquals(4, len(data))
     # Approve skill
     skill = Skill(title="Math").put()
     response = self.testapp.post_json('/skills/approve/',
                                       {'_id': skill.id()})
     self.assertEquals(response.status_int, 201)
     self.assertEquals(Skill.get(skill.id()).approved, 1)
     # Create skill
     before = len(Skill.all())
     post_data = {'action': 'new',
                  'data': {'title': 'Python',
                           'desc': 'Love it!',
                           'links': [{'url': 'http://www.github.com',
                                      'title': 'My Github'}]}}
     response = self.testapp.post_json('/skills', post_data)
     self.assertEquals(response.status_int, 201)
     self.assertEquals(response.content_type, 'application/json')
     self.assertEquals(before + 1, len(Skill.all()))
     # Remove skill
     before = len(Skill.all())
     response = self.testapp.post_json('/skills',
                                       {'_id': skill.id(),
                                        'action': 'delete'})
     self.assertEquals(response.status_int, 200)
     self.assertEquals(before - 1, len(Skill.all()))
     # Updte skill
     links_before = len(Link.query().fetch())
     skill = Skill(title='Foo', desc='Bar').put()
     post_data = {'action': 'update',
                  'data': {'_id': skill.id(),
                           'title': 'Noob',
                           'desc': 'Noob!',
                           'links': [{'url': 'http://www.noob.com',
                                      'title': 'Noob Com'}]}}
     response = self.testapp.post_json('/skills', post_data)
     self.assertEquals(200, response.status_int)
     self.assertEquals(links_before + 1, len(Link.query().fetch()),
                       msg="Should create new link.")
     skill = Skill.get(skill.id())
     self.assertEquals(len(skill.links), 1)
     self.assertEquals(skill.title, 'Noob')
def main():
    X, y = load()


    print 'Running Model'
    mdl = xgb.XGBClassifier()
    scores = cross_val_score(mdl, X, y, cv=5, scoring='accuracy', n_jobs=-1)
    print np.mean(scores)
def histogram(cx,Nb,bin_extent,axarr,full_hist,style):
    '''
    Plot histograms-----------------------------------------------------------
    --------------------------------------------------------------------------
    Arguments:

    cx: The data you want histogrammed (ie. a colour, mass etc.)
    
    Nb: Number of bins.
    
    bin_extent: List of form [lower bound,upper bound]
    
    axarr: the plotting array as from make_grid or make_stack.
    
    full_hist: set as "all" for all galaxies, "all spirals" for all spiral 
    galaxies, or "assigned spirals" for all spirals that meet the threshold to
    be classified as having a particular arm number.
    
    style: histogram line style eg. "solid"
    --------------------------------------------------------------------------
    '''
  
    # Load all of the data and assign arms to each:
    table,full_table = load_data.load(cx=cx,cy=["REDSHIFT_1"]
        ,p_th=0.5,N_th=5,norm=False,p_values="d")
    bins,table = load_data.assign(table=table,Nb=20,th=0.5,
        equal_samples=True,redistribute=False,rd_th=0,ct_th=0
        ,print_sizes=False)
    # Define histogram bins:
    
    bin_values=np.linspace(bin_extent[0],bin_extent[1],Nb+1)
    
    for m in range(5):    
        t_select=table[bins[:,1] == m]
        # Reference histograms:
        if full_hist == "all":
            axarr[m].hist(table[:,-1],bins=bin_values,normed=True
	        ,histtype="step",linewidth=2, color="k",alpha=0.75)
	    
        elif full_hist == "all spirals":
            axarr[m].hist(table[:,-1],bins=bin_values,normed=True
	        ,histtype="step",linewidth=2, color="k",alpha=0.75)
	    
        else:
            sel = table[(bins[:,1] != -999) & (bins[:,1] != 5)]
            axarr[m].hist(sel[:,-1],bins=bin_values,normed=True
                ,histtype="step",linewidth=2, color="k",alpha=0.75)
	    
            if full_hist != "assigned spirals":
                print("Invalid full_hist value; using 'assigned spirals'")
	  
        # Plot histograms.
        axarr[m].hist(t_select[:,-1],bins=bin_values,normed=True
            ,histtype="step",linewidth=2,color=colours[m],linestyle=style)

    return None
Exemple #9
0
def xgb_model_cv():
    X, y = load()

    print 'Running Model'
    mdl = xgb.XGBClassifier()
    scores = cross_val_score(mdl, X, y, cv=5, scoring='f1', n_jobs=-1)
    print 'f1 =', np.mean(scores)
    scores = cross_val_score(mdl, X, y, cv=5, scoring='accuracy', n_jobs=-1)
    print 'accuracy =', np.mean(scores)
    scores = cross_val_score(mdl, X, y, cv=5, scoring='recall', n_jobs=-1)
    print 'recall =', np.mean(scores)
def load(dataset_path, force_refresh = False, **kwargs):
  if force_refresh or not os.path.isfile(dataset_path):
    print "[trainer] loading data from scratch"
    df = load_data.load(**kwargs)
    print "[trainer] assigning airport"
    df = load_data.assign_airport(df, **kwargs)
    print "[trainer] saving dataset to %s" % dataset_path
    df.to_csv(dataset_path, encoding='utf-8')
  else:
    print "[trainer] loading existing data at %s" % dataset_path
    # Using python engine, which is slower, but was having memory issues when using
    # the C engine
    df = pd.read_csv(dataset_path, encoding='utf-8', engine="python")
  return df
Exemple #11
0
def xgb_model():
    X, y = load()
    params = {'learning_rate': [0.01, 0.03, 0.1, 0.3],
              'n_estimators': [50, 125, 300],
              'subsample': [0.5, 1.0],
              'max_depth': [1, 3, 10]}
    mdl = xgb.XGBClassifier()
    gs = GridSearchCV(mdl, params, cv=5, n_jobs=-1)
    gs.fit(X, y)
    print 'Best params:', gs.best_params_
    print 'Best score:', gs.best_score_

    mdl = gs.best_estimator_
    with open('data/xgb_model.pkl', 'w') as f:
        pickle.dump(mdl, f)
def plot_data(cx,axarr,Nb,equal_samples,style,errors,data_type):
    '''
    Plot data-----------------------------------------------------------------
    --------------------------------------------------------------------------
    Arguments:

    cx: The data you want plotted (ie. a colour, mass etc.)
    
    axarr: the plotting array as from make_grid or make_stack.
    
    Nb: Number of bins.
    
    equal samples: if True, bin into equally sized samples. If False, bin 
    into equally spaced bins.
    
    style: linestyle eg. "dotted" or "solid"
    
    errors: if True, plot errors from Cameron et al.
    
    data_type: have either "d" for debiased, "r" for raw, or "w" for debiased
    from Willet et al.
    --------------------------------------------------------------------------
    '''
  
    table,full_table = load_data.load(cx=cx,cy=["REDSHIFT_1"],p_th=0.5,
        N_th=10,norm=False,p_values=data_type)
    bins,table = load_data.assign(table=table,Nb=Nb,th=0.5,
        equal_samples=equal_samples,redistribute=False,rd_th=0,ct_th=0,
        print_sizes=False)

    for m in range(5):
    
        fracs=load_data.get_fracs(table=table,bins=bins,m=m,c=0.683
            ,full_data="assigned spirals")    
        axarr[m].plot(fracs[:,0],fracs[:,1]/fracs[:,2],color=colours[m],
            linestyle=style,linewidth=2)
        
        if errors == True:
            axarr[m].fill_between(fracs[:,0],fracs[:,3],fracs[:,4]
                ,color=colours[m],alpha=0.3,linestyle="dashed",hatch="/")
    
    return None
Exemple #13
0
              'w_bn_b_y',
              'w_bn_b_z']:
        ocp.bound(w,(-4*pi,4*pi))

    return ocp


if __name__=='__main__':
    nk = 100
    tStart = 20.0 # [sec]
    tEnd   = 21.0 # [sec]
    T = tEnd - tStart
    ts = T / float(nk)

    # load the data
    (data,interval,_) = load_data.load(tStart, tEnd, ts)

    # fix some signs in the data
    data['encoder']['sin_delta'] = -data['encoder']['sin_delta']

    conf = makeConf()
    conf['useVirtualForces'] = 'random_walk'
    conf['useVirtualTorques'] = 'random_walk'
    dae = carousel_dae.makeDae(conf)

    print "setting up ocp..."
    ocp = setupOcp(dae,conf,T,nk,deg=3)

    lineRadiusGuess = 1.7

    # trajectory for initial guess
    i, j, train, test, k, reg , X, y = params
    print "Month, Fold, Regressor =", i,j,k
    print reg
    reg.fit(X[train], y[train])
    tr = reg.predict(X[test])
    su = reg.predict(X_submission)
    return [i,j,k,test,tr,su]


if __name__ == '__main__':

    np.random.seed(0)
    n_folds = 5
    n_jobs = 4

    X, X_submission, ys, ids, idx = load_data.load()

    # Smart transformation to avoid logscale in evaluation:
    ys = np.log(ys/500.0 + 1.0)

    y_submission = np.zeros((X_submission.shape[0], 12))

    # regs = [RandomForestRegressor(n_estimators=1000, n_jobs=1, max_features='auto'),
    #         ExtraTreesRegressor(n_estimators=1000, n_jobs=1, max_features='auto'),
    #         GradientBoostingRegressor(learn_rate=0.001, subsample=0.5, max_depth=6, n_estimators=1000)]

    # My best submission used just this one:
    regs = [GradientBoostingRegressor(learn_rate=0.001, subsample=0.5, max_depth=6, n_estimators=20000)]

    dataset_blend_train = np.zeros((X.shape[0], 12*len(regs)), dtype=np.double)
    dataset_blend_submission = np.zeros((X_submission.shape[0], 12*len(regs), n_folds), dtype=np.double)
Exemple #15
0
from sklearn import linear_model
from sklearn import metrics
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import pandas as pd
import load_data as load_data
import preprocess as pre

X_train_A, X_train_B, y_train = load_data.load("train")
X_test_A, X_test_B = load_data.load("test")

X_train_A, X_train_B = pre.common_name(X_train_A,X_train_B)
X_train = pre.proc(X_train_A) - pre.proc(X_train_B)

model = linear_model.LogisticRegression(fit_intercept=False)
#params = {'n_estimators':200, 'learning_rate':0.01,'max_depth':3, 'random_state':0}
#model = GradientBoostingClassifier(**params)
model.fit(X_train,y_train['Choice'])

preds = model.predict_proba(X_train)[:,1]
fpr, tpr, thresholds = metrics.roc_curve(y_train, preds)
auc = metrics.auc(fpr,tpr)

print 'AuC score on training data:',auc

###########################
# PREDICTING ON TEST DATA
###########################
X_test_A, X_test_B = pre.common_name(X_test_A,X_test_B)
X_test = pre.proc(X_test_A) - pre.proc(X_test_B)
preds_test = model.predict_proba(X_test)[:,1]
Exemple #16
0
import pylab
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import load_data
import gc

plt.rcParams['figure.figsize'] = 10, 10

#Load the data.
train = pd.read_json("input/train.json")
test = pd.read_json("input/test.json")

##### set the target of train and test
train_X, X_angle = load_data.load(train)
test_X, X_test_angle = load_data.load(test)
train_y = np.array(train['is_iceberg'])

#############pre process on images :)
import pre_pros
train_X = pre_pros.pre_pros(train_X)
test_X = pre_pros.pre_pros(test_X)

### configure the callbacks of the model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import TensorBoard

callbacks_list = [
def contour_plots(cx,cy,grid_spacing,axarr,xlims,ylims,reference_plot,
    m_plot,reference,levels,alphas,sigma):
  
    '''
    Plot contours ------------------------------------------------------------
    --------------------------------------------------------------------------
    Arguments:

    cx,cy: x+y axis columns for contour (refer to the same cx+cy values in 
    load_data.load).
    
    grid_spacing: Number of cells to divide in to for the 2d histogram.
    
    axarr: the plotting array as from make_grid or make_stack.
    
    xlims,ylims: Lists of the form [lower bound, upper bound] for the
    x+y histogram limits.
    
    reference_plot,m_plot: Plot style. Can have the following values:
    
    -------
    "contour": pure line contour.
    "contourf": pure filled contour.
    "contourf+line": lines+filled contour.
    "hist": 2D shaded histogram.
     -------
   
    reference: Can set as "all galaxies" to include all of the volume-limited
    sample or "all spirals" to only show the spiral sample.
    
    levels: Set the contour levels manually here.
    
    alphas: 2 item list with [reference transparency, arm sample transparency]
    values.
    --------------------------------------------------------------------------
    '''
    
    table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d")
    
    #table[:,-2:] = np.random.randn(len(table),2)
    #full_table[:,-2:] = np.random.randn(len(full_table),2)
    
    bins,table = load_data.assign(table=table,Nb=20,th=0.5,equal_samples=True,
        redistribute=False,rd_th=0,ct_th=0,print_sizes=False)
    
    if reference == "all galaxies":
        reference_table = full_table
        
    elif reference == "all spirals":
        reference_table = table
    
    for m in range(5):
        
        t_sel = table[bins[:,1] == m]
        
        if (reference_plot is not None) & ((reference == "all galaxies") or (reference == "all spirals")): 
            contour(table=reference_table,xlims=xlims,ylims=ylims,grid_spacing=grid_spacing
            ,contour_type=reference_plot,ax=axarr[m],alpha=alphas[0],colour=["k","Greys"],levels=levels
            ,sigma=sigma)
	    
        if m_plot != None:
        
            contour(table=t_sel,xlims=xlims,ylims=ylims,grid_spacing=grid_spacing
                ,contour_type=m_plot,ax=axarr[m],alpha=alphas[1],colour=[colours[m],cmaps[m]],levels=levels
                ,sigma=sigma)

    axarr[0].set_xlim(xlims)
    axarr[0].set_ylim(ylims)
        
    return None
Exemple #18
0
    clf = svm.LinearSVC()
    clf.fit(X, Y)
    return clf


def incremental_SGD(X, Y, loss):
    sgd = SGDClassifier(loss=loss, penalty="l2")
    labels = np.unique(Y)
    for i in range(X.shape[0]):
        point_x = X[i]
        point_y = Y[i]
        sgd.partial_fit([point_x], [point_y], classes=labels)
    return sgd


(X_train, Y_train), (X_test, Y_test), cross_val_indices = load()
X_train = X_train.reshape((X_train.shape[0], ) +
                          (np.prod(X_train.shape[1:]), ))
X_test = X_test.reshape((X_test.shape[0], ) + (np.prod(X_test.shape[1:]), ))
Y_train = np.argmax(Y_train, axis=1)
Y_test = np.argmax(Y_test, axis=1)

losses = ["hinge", "log", "modified_huber", "squared_hinge"]
for loss in losses:
    mean_error = 0
    for i, val in enumerate(cross_val_indices):
        sgd = incremental_SGD(X_train[val], Y_train[val], loss)
        error = sgd.score(X_test, Y_test)
        print("For validation %d using %s loss" % (i, loss), error)
        mean_error += error
    print("Mean error for %s loss was %f" % (loss, mean_error /
from sklearn.cross_validation import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

if __name__ == '__main__':

    # np.random.seed(0) # seed to shuffle the train set

    n_folds = 4
    n_threads = 2
    
	# verbose = False
    # shuffle = False

    print 'Loading train and test sets'
    trainset, train_label, testset = load_data.load()
    print 'Finished feature engineering for train and test sets'

    # if shuffle:
    #   idx = np.random.permutation(train_label.size)
    #   trainset = trainset[idx]
    #   train_label = train_label[idx]

    # Level 0 classifiers
    clfs = [RandomForestClassifier(n_estimators=100, n_jobs=n_threads, criterion='gini'),
            RandomForestClassifier(n_estimators=100, n_jobs=n_threads, criterion='entropy'),
            ExtraTreesClassifier(n_estimators=100, n_jobs=n_threads, criterion='gini'),
            ExtraTreesClassifier(n_estimators=100, n_jobs=n_threads, criterion='entropy'),
            GradientBoostingClassifier(n_estimators=50, learning_rate=0.05, subsample=0.5, max_depth=6)]

    # Stratified random shuffled cross validation
Exemple #20
0
def run():
    global task_name
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--model_directory',
                        type=str,
                        default=MODEL_DIRECTORY)
    parser.add_argument('-dd',
                        '--dataset_directory',
                        type=str,
                        default=DATASET_DIRECTORY)
    parser.add_argument('-bs', '--batch_size', type=int, default=BATCH_SIZE)
    parser.add_argument('-lr',
                        '--learning_rate',
                        type=float,
                        default=LEARNING_RATE)
    parser.add_argument('-g', '--gpu', type=int, default=GPU)
    parser.add_argument('-t', '--task_name', type=str, default=task_name)
    parser.add_argument('-n', '--hyper_net', type=str, default=HYPER_NET)
    parser.add_argument('-v', '--variance', type=float, default=RF.VARIANCE)
    parser.add_argument('-de', '--depth', type=int, default=52)
    parser.add_argument('-ep', '--epoch', type=int, default=100)

    args = parser.parse_args()
    epoch = args.epoch
    task_name = args.task_name
    RF.VARIANCE = args.variance
    directory_output = os.path.join(args.model_directory)
    depth = args.depth

    X_train, Y_train, X_test, Y_test = load_data.load()
    X_test_m = [0] * (10)
    Y_test_m = [0] * (10)
    for i in range(10):
        X_test_m[i] = X_test[i * 1000:(i + 1) * 1000]
        Y_test_m[i] = Y_test[i * 1000:(i + 1) * 1000]

    # 縮小する
    #X_train, Y_train = X_train[0:5000], Y_train[0:5000]
    #X_test, Y_test = X_test[0:1000] , Y_test[0:1000]

    X = tf.placeholder("float", [None, 32, 32, 3])
    Y = tf.placeholder("float", [None, 10])
    time_list = tf.placeholder("float", [None])
    W_list = tf.placeholder("float", [None])
    learning_rate = tf.placeholder("float", [])
    hypernet = args.hyper_net  # tf.placeholder("string")
    task_name_tr = tf.placeholder("string")

    net = RF.SDE_model(X,
                       depth,
                       time_list,
                       W_list,
                       task_name,
                       hypernet,
                       test=False)
    test_net = RF.SDE_model(X,
                            depth,
                            time_list,
                            W_list,
                            task_name,
                            hypernet,
                            test=True)

    sess = tf.Session()
    beta = 1e-3

    cross_entropy = -tf.reduce_sum(
        Y * tf.log(tf.clip_by_value(net, 1e-10, 1.0)))
    suml2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
    loss = cross_entropy + beta * suml2
    #opt = tf.train.MomentumOptimizer(learning_rate, 0.9)
    var_name_list1 = ["W_conv", "b_conv"] + hypernet_variable[0]
    var_name_list2 = ["W_fc1", "b_fc1", "W_fc2", "b_fc2", "W_fc3", "b_fc3"]

    #train_op = None

    correct_prediction = tf.equal(tf.argmax(test_net, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    saver = tf.train.Saver()
    batch_size = args.batch_size
    num_data = X_train.shape[0]

    # with tf.variable_scope("scope", reuse=True ):
    #    var_list1 = [ tf.get_variable(name=x) for x in var_name_list1 ]
    #    var_list2 = [ tf.get_variable(name=x) for x in var_name_list2 ]

    #    if task_name == "ResNet" or task_name =="ResNet_test" or task_name =="Stochastic_Depth":
    #        learning_late = 1e-4
    #    else:
    #        learning_late = 1e-6
    #    train_op1 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list1 )  # tf.train.GradientDescentOptimizer(0.000001)
    #    train_op2 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list2 ) # tf.train.GradientDescentOptimizer(0.0001)
    # tf.group(train_op1, train_op2)  # tf.train.GradientDescentOptimizer( 1e-6 ).minimize(cross_entropy) #
    train_op = tf.train.MomentumOptimizer(args.learning_rate,
                                          0.9).minimize(loss)

    sess.run(tf.global_variables_initializer())

    print(tf.trainable_variables())
    late_ad = 1.0
    for j in range(epoch):
        sff_idx = np.random.permutation(num_data)
        if j < 20:
            late_ad = 1.0
        elif j < 40:
            late_ad = 0.1
        elif j < 60:
            late_ad = 0.01
        else:
            late_ad = 0.001

        for idx in range(0, num_data, batch_size):
            batch_x = X_train[sff_idx[idx:idx + batch_size if idx +
                                      batch_size < num_data else num_data]]
            batch_y = Y_train[sff_idx[idx:idx + batch_size if idx +
                                      batch_size < num_data else num_data]]

            t, W = RF.tW_def(depth, task_name)

            feed_dict_train = {
                X: batch_x,
                Y: batch_y,
                learning_rate: args.learning_rate * late_ad,
                time_list: t,
                W_list: W,
                task_name_tr: task_name
            }

            # print(sess.run(net,feed_dict=feed_dict_train))
            #print(sess.run(tf.argmax(net, 1),feed_dict=feed_dict_train))

            sess.run([train_op], feed_dict=feed_dict_train)
            # for z in (RF.Z_imagetest):
            #print(sess.run(net,feed_dict= feed_dict_train))
            #assert(not np.isnan(sess.run(z,feed_dict=feed_dict_train)).any())
            #count += 1
        elapsed = time.time() - start_time
        print("epoch %d end : %.3f seconds elapsed " % (j, elapsed))

        # if j % 512 == 0:
        #    a=1
        if True or j == 0 or j % 10 == 9 or j + 1 == EPOCH:  # 最初 , 10回ごと , 最後 のどれかならテストしてみる
            t_test, W_test = RF.tW_def(depth, "test")
            if task_name == "ResNet" or task_name == "Stochastic_Depth":
                task_name_test = "ResNet_test"
            else:
                task_name_test = "test"
            feed_dict_test = {
                X: X_test,
                Y: Y_test,
                time_list: t_test,
                W_list: W_test,
                task_name_tr: task_name_test
            }
            if SAVE_ENABLE:
                print("saving checkpoint...")
                saver.save(
                    sess, "model/model.ckpt" + str(task_name) + "step" +
                    str(j) + datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
                print("saved!")
            acc = 0
            for i in range(10):
                feed_dict_test = {
                    X: X_test_m[i],
                    Y: Y_test_m[i],
                    time_list: t_test,
                    W_list: W_test,
                    task_name_tr: task_name_test
                }
                acc += sess.run(accuracy, feed_dict=feed_dict_test)
            acc = acc / 10.0
            print("accuracy after epoch %d : %.3f " % (j, acc), flush=True)
        # accuracy_summary = tf.scalar_summary("accuracy", accuracy)
    # ここからパラメータ数計算および列挙
    total_parameters = 0
    parameters_string = ""
    for variable in tf.trainable_variables():
        sess.run(
            tf.verify_tensor_all_finite(variable,
                                        "NaN  in : %s \n" % variable.name))
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
        if len(shape) == 1:
            parameters_string += ("%s %d, " %
                                  (variable.name, variable_parameters))
        else:
            parameters_string += (
                "%s %s=%d, " %
                (variable.name, str(shape), variable_parameters))

    print(parameters_string)
    print("Total %d variables, %s params" %
          (len(tf.trainable_variables()), "{:,}".format(total_parameters)))
    sess.close()
Exemple #21
0
        recall_single_type = dict_true_positive[predict_type] / dict_truth[
            predict_type]
        F1_single_type = 2 * precision_single_type * recall_single_type / (
            precision_single_type + recall_single_type)
        print(" %s:    %.3f,   %.3f,   %.3f" %
              (predict_type, precision_single_type, recall_single_type,
               F1_single_type))
        tp += dict_true_positive[predict_type]
        predict_res += dict_predict[predict_type]
        ground_truth += dict_truth[predict_type]
    avg_precision_postag = tp / predict_res
    avg_recall_postag = tp / ground_truth
    F1_postag = 2 * avg_precision_postag * avg_recall_postag / (
        avg_precision_postag + avg_recall_postag)
    print("###### pos taging evaluation ######")
    print("precision: ", avg_precision_postag)
    print("recall: ", avg_recall_postag)
    print("F1: ", F1_postag)
    """print count of each type """
    # comm = {}
    # for key in sorted(dict_true_positive):
    #     comm[key] = dict_truth[key]
    # print(sorted(comm.items(), key = lambda kv:(kv[1], kv[0])))

    return


sentences, words_index, dict_index2type = load_data.load(
    './people-2014/test/0123/')
evaluate(sentences, words_index, dict_index2type)
Exemple #22
0
try:
  fine_tuning
except :
  fine_tuning = False

try:
  load_data_flag 
except :
  load_data_flag = True 
  

if load_data_flag == True:
  from load_data import load
  train_x_s, train_y_s, val_x_s, val_y_s, train_x_t, train_y_t, \
    val_x_t, val_y_t, test_x_t, test_y_t = load()

from embedders import variable_embedder, embedding 
# embedding_type = "pca"
# embedder = embedding(embedding_type, n_cmp = 100)
embedding_type = "no_embedding"
embedder = embedding(embedding_type)
# =============================================================================
emb_x_s, emb_val_x_s, emb_x_t, emb_val_x_t, emb_test_x = \
variable_embedder(embedder,\
[train_x_s, val_x_s, train_x_t, val_x_t, test_x_t]) 
# =============================================================================
# emb_x_s = embedder.fit_transform(train_x_s)
# emb_x_t = embedder.fit_transform(train_x_t)
# emb_val_x_s = embedder.fit_transform(val_x_s)
# emb_val_x_t = embedder.fit_transform(val_x_t)
Exemple #23
0
import tensorflow as tf
from tensorflow.contrib import rnn
from load_data import load
from load_data import split10
import numpy as np
import time
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
LOGDIR = "/tmp/final_project/10LCacc"
import shutil
shutil.rmtree(LOGDIR)
# load data
path = "D:/final project/data/"
attribute = ["LCaccX", "LCaccY", "LCaccZ", "label"]

print("loading data")
train_data, test_data = load(path, attribute)
print("loading done! training size: %r, testing size: %r"%(np.shape(train_data), np.shape(test_data)))

kernel_size = 7
max_pool_size = 5
batch_size = 50
n_class = 10
natt = len(attribute) - 1
nrow = 200
rnn_size = 100
learning_rate = 0.01
moment_rate = 0.01


def conv_layer(input, size_in, size_out, name="conv"):
    with tf.name_scope(name):
Exemple #24
0
    # If "gen_data" = True, generate a dataset in .p format
    if gen_data:
        generate(metadata_path=metadata_path,
                 data_path=testdata_path,
                 batch_size=batch_size_test,
                 label_column_name=label_column_name,
                 is_training=False,
                 fold=fold)
    else:
        pass

    # Calculate mean of each channel
    #- Load data (.p)
    patch_mean = np.array([0, 0, 0],
                          np.float32)  # Initialize mean of each channel
    dataframe = load(testdata_path, batch_size_test)  # Instance
    #- Calculate mean of each channel
    for i, row in dataframe.dataframe.iterrows():
        patch = row['patch']
        patch_mean[0] += np.mean(patch[:, :, 0])
        patch_mean[1] += np.mean(patch[:, :, 1])
        patch_mean[2] += np.mean(patch[:, :, 2])
    patch_mean = patch_mean / len(dataframe.dataframe['patch'])
    #- Delete "dataframe" from memory
    dataframe.left = None

    # Session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
Exemple #25
0
	parser.add_argument('--batch_size', default=32, type=int, help='Batch size')
	parser.add_argument('--epochs', default=200, type=int, help='Number of epochs')
	parser.add_argument('--val_split', default=0.20, type=float, help='Validation split for validating model')
	parser.add_argument('--is_training', default=1, type=int, help='Training(1) or testing(0)')
	parser.add_argument('--lr', default=0.0001, type=float, help='Learning rate' )
	parser.add_argument('--beta_1', default=0.9, type=float, help='Beta 1')
	parser.add_argument('--beta_2', default=0.999, type=float, help='Beta 2')
	parser.add_argument('--epsilon', default=1e-08, type=float, help='Epsilon')
	parser.add_argument('--decay', default=0.0, type=float, help='Decay rate')
	parser.add_argument('--data_path', default='data/', help='Path to data folder')
	parser.add_argument('--save_weights', default=1, type=int, help='Save weights (Yes=1, No=0)')
	parser.add_argument('--plot', default=1, type=int, help='Plot accuracy or loss curves (Yes=1, No=0)')
	args = parser.parse_args()
	
	#load data
	x_train, x_test, y_data, y_test, target, _ = ld.load()
	
	#define model
	model = autoencoder(x_train, args)
	
	
	# train or test
	if args.is_training:
		hist = train(model=model, data=((x_train, y_data)), args=args)
		if args.plot:
			plot(hist)
	else:  # as long as weights are given, will run testing
		model.load_weights('model_weights.h5')
		score = test(model=model, data=((x_test, y_test)), target=target)
		print('MAP score : ', score)
Exemple #26
0
# limitations under the License.

from __future__ import print_function
import json
import numpy as np
import math as m
import netket as nk
from load_data import load
from mpi4py import MPI

N = 10
path_to_samples = 'ising1d_train_samples.txt'
path_to_bases = 'ising1d_train_bases.txt'

# Load the data
U, sites, training_samples, training_bases = load(N, path_to_samples,
                                                  path_to_bases)

# Constructing a 1d lattice
g = nk.graph.Hypercube(length=N, n_dim=1, pbc=False)

# Hilbert space of spins from given graph
hi = nk.hilbert.Qubit(graph=g)

# Machine
ma = nk.machine.RbmSpin(hilbert=hi, alpha=1)
ma.init_random_parameters(seed=1234, sigma=0.001)

# Sampler
sa = nk.sampler.MetropolisLocal(machine=ma)

# Optimizer
if args.net == 'simple':

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()
    xp = np if args.gpu < 0 else cuda.cupy
elif args.net == 'parallel':
    cuda.check_cuda_available()
    xp = cuda.cupy

batchsize = 50

# データをロード
print "load dataset"
N , row, col, dataset = ld.load()
dataset['data'] = dataset['data'].astype(np.float32)
dataset['data'] /= 256
dataset['target'] = dataset['target'].astype(np.int32)


X_train, X_test = np.split(dataset['data'],   [N])
y_train, y_test = np.split(dataset['target'], [N])
N_test = y_test.size
N = y_train.size
print y_train

X_train = X_train.reshape((len(X_train), 1, row, col))
X_test = X_test.reshape((len(X_test), 1, row, col))

Exemple #28
0
# coding=utf-8

import sys  
reload(sys)  
sys.setdefaultencoding('utf8')

from sklearn.datasets import load_iris
from sklearn import tree
from numpy import array
import pandas as pd
import random
from sklearn.cross_validation import train_test_split

from load_data import load

X, y, attribute_names = load(10)
# X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
lymph = pd.DataFrame(X, columns=attribute_names)
lymph['class'] = y
# remove class 0 and class 3
lymph_filtered = lymph[(lymph['class']==1) | (lymph['class']==2)]
# Split to validate
X_train, X_test, y_train, y_test = train_test_split(lymph_filtered[attribute_names], lymph_filtered['class'], test_size=0.20, random_state=42)

# Draw decision tree
clf = tree.DecisionTreeClassifier()
lymph_tree = clf.fit(X_train, y_train)
export_tree(lymph_tree,attribute_names)


import numpy as np

# preprocessing

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# keras for model building

#from keras.models import Model
from keras.layers import GRU, Dense, TimeDistributed, RepeatVector, Bidirectional
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.losses import sparse_categorical_crossentropy

english_sentences = load_data.load('small_vocab_en.txt')
french_sentences = load_data.load('small_vocab_fr.txt')


def tokenize(x):
    x_tkzr = Tokenizer(char_level=False)
    x_tkzr.fit_on_texts(x)
    return x_tkzr.texts_to_sequences(x), x_tkzr


def pad(x, length=None):
    if length is None:
        length = max([len(sentence) for sentence in x])
    return pad_sequences(x, maxlen=length, padding='post')

Exemple #30
0
# -*- coding: utf-8 -*-
import csv
import codecs
import json

from string import Template

from datetime import datetime
from datetime import date

import load_data

timestamp=datetime.now().replace(microsecond=0).isoformat()

#load all the data
countries, funds, data = load_data.load()

mcc_id="US-18" # which one is actual ID? <- this one
#mcc_id="US-USG-MCC-18"

#translates "Fiscal Year, Fiscal Quarter" into ISO date for the start date of the quarter
def fiscal_date(date):
	try:
		fy,fq=date.split(',')
		fy=fy.strip()
		fq=fq.strip()
		
		fy=fy[3:]
		fy=int(fy)
		
		if fq=="FQ 1":
Exemple #31
0
    # evaluate mean and covariance of predictive distribution    
    prec_mean = np.dot(Ks, invKy) # (200, 1)
    
    # Kinv = invKy / y
    # print 'Kss', np.shape(Kss), 'Ks', np.shape(Ks), 'kinv', np.shape(Kinv), 'Ks.T', np.shape(Ks.T)
    # prec_cov = Kss - Ks * Kinv * Ks.T # <<< slow
    # prec_cov = Kss - Ks * Ks.T * Kinv 
        
    # return predictive values and either
    if ReturnCov: # full covariance, or
        return np.array(prec_mean).flatten(), np.array(prec_cov)
    else: # just standard deviation
        return np.array(prec_mean).flatten()#, np.array(np.sqrt(np.diag(prec_cov)))
    
# Load data Q2
time1, flux1, yerr1, cadence1 = load_data.load('012317678', quarter = 3, return_cadence = True)
# Load data Q3
time2, flux2, yerr2, cadence2 = load_data.load('012317678', quarter = 4, return_cadence = True)

# x1 = time1[:100]; x2 = time1[200:300]
# y1 = flux1[:100]; y2 = flux1[200:300]
# c1 = cadence1[:100]; c2 = cadence1[200:300]
length = 500
x1 = time1[:length]; x2 = time2[:length]
y1 = flux1[:length]; y2 = flux2[:length]
c1 = cadence1[:length]; c2 = cadence2[:length]

# Adjust flux 3 so that it follows on from flux 2 and mean centre
y2 -= y1[-1]
y1 -= np.mean(y1)
y2 -= np.mean(y2)
from tensorflow import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
import load_data as ld
import network as net

# define minimum needed variables
data_dir = "data"  # data location
file_ext = ".npy"  # numpy bitmap arrays
classes = ["circle", "square", "hexagon"]  # classes to train network on

# load data
x, y = ld.load(classes=classes,
               data_dir=data_dir,
               file_ext=file_ext,
               samples=10,
               reload_data=False)
x_train, y_train, x_test, y_test = ld.divide_into_sets(input=x,
                                                       response=y,
                                                       ratio=0.2)

# train network
yolo = net.build_network(y=y_train,
                         batch=6,
                         version=1,
                         input_size=(448, 448, 3),
                         output_size=392)
yolo = net.train_network(network=yolo,
                         x=x_train,
                         y=y_train,
Exemple #33
0
params = sys.argv

mode = params[1]
config = params[2]
binary_encode = False

if "--encode" in params:
    binary_encode = True

print binary_encode

# amount of rows
if hasattr(data, "toarray"):
    data = data.toarray()

data = data.load(binary_encode)

orig_shape = np.shape(data)

print orig_shape

orig_rows = orig_shape[0]
orig_dimension = orig_shape[1]

origDistances = np.empty((orig_rows, orig_rows))
r = range(orig_rows)
for i in r:
    for j in r:
        if i == j:
            origDistances[i][j] = .0
        else:
def xy_plot(cx,cy,Nb,equal_samples,reference,style,axarr,standard_dev):
    '''
    Plot binned data ---------------------------------------------------------
    --------------------------------------------------------------------------
    Arguments:

    cx,cy: x+y axis columns for contour (refer to the same cx+cy values in 
    load_data.load).
    
    Nb: Number of bins to plot.
    
    equal_samples: if set as True, all bins will have the same number of
    galaxies.
    
    reference: Can be set as 'all galaxies', 'all spirals' or None.
    
    style: linestyle to plot eg. 'dashed' or 'solid'
    
    reference_plot,m_plot: Plot style. Can have the following values:
    
    axarr: plot array as from make_grid or make_stack.
    
    standard_dev: if True, the standard deviation is plotted as a filled 
    contour.
    --------------------------------------------------------------------------
    '''
  
    table,full_table = load_data.load(cx=cx,cy=cy,p_th=0.5,N_th=10,norm=False,p_values="d")
    
    bins,table = load_data.assign(table=table,Nb=Nb,th=0.5,equal_samples=equal_samples,
        redistribute=False,rd_th=0,ct_th=0,print_sizes=False)
    full_bins,full_table = load_data.assign(table=full_table,Nb=Nb,th=0.5,equal_samples=equal_samples,
        redistribute=False,rd_th=0,ct_th=0,print_sizes=False)
    
    if reference == "all galaxies":
        reference_table,reference_bins = full_table,full_bins
        
    else:
        reference_table,reference_bins = table,bins
        
        if (reference != "all spirals") and (reference != None):
            print("Invalid 'reference' value; using 'all spirals'")
        
    xy_r = load_data.get_xy_binned(reference_table,reference_bins)
    
    for m in range(5):
      
        t_sel = table[bins[:,1] == m]
        b_sel = bins[bins[:,1] == m]
        
        xy = load_data.get_xy_binned(t_sel,b_sel)
        
        if reference != None:
            axarr[m].plot(xy_r[:,0],xy_r[:,2],color="k",linewidth=2,linestyle=style)
            if standard_dev == True:
                axarr[m].fill_between(xy_r[:,0],xy_r[:,2]+xy_r[:,3],xy_r[:,2]-xy_r[:,3],color="k",alpha=0.5)

        axarr[m].plot(xy[:,0],xy[:,2],color=colours[m],linewidth=2,linestyle=style)
        if standard_dev == True:
            axarr[m].fill_between(xy[:,0],xy[:,2]+xy[:,3],xy[:,2]-xy[:,3],color=colours[m],alpha=0.5)

    return None
Exemple #35
0
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import numpy as np
import load_data

if __name__ == "__main__":

    X, y, X_sub, feature_names = load_data.load()

    valSize = int(X.shape[0] * 0.3)

    X_train = X[:-valSize]
    y_train = y[:-valSize]
    X_val = X[-valSize:]
    y_val = y[-valSize:]

    clf = lgb.LGBMClassifier(n_estimators=170,
                             learning_rate=0.05,
                             num_leaves=5000,
                             colsample_bytree=.9,
                             subsample=1,
                             silent=True,
                             min_child_weight=1,
                             seed=1000,
                             min_child_samples=10,
                             reg_alpha=0.01,
                             max_bin=5000)

    print("Training...")
    clf.fit(X_train,
            y_train,
Exemple #36
0
import tensorflow as tf
import numpy as np

import load_data

np.random.seed(100000)

X_train_text, X_test_text, X_train_audio, X_test_audio, X_train_gest, X_test_gest, X_train_video, X_test_video, Y_train, Y_test = load_data.load(
)

print(X_train_audio.shape, X_test_audio.shape)

if __name__ == '__main__':
    from sklearn import svm, tree, ensemble

    # f = np.concatenate((X_train_audio, X_train_text, X_train_video, X_train_gest), axis=1)
    X_train = np.concatenate(
        (X_train_audio, X_train_text, X_train_video, X_train_gest), axis=1)
    X_test = np.concatenate(
        (X_test_audio, X_test_text, X_test_video, X_test_gest), axis=1)
    print(X_train.shape)

    print("SVM")
    clf = svm.SVC()
    clf.fit(X_train, np.argmax(Y_train, axis=1))
    # print(clf.support_vectors_)

    a = clf.predict(X_test)
    print(a.shape)
    y_true = np.argmax(Y_test, 1)
    print(np.mean(y_true == a))
def main():
    PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
    train_path = os.path.join(PROJECT_ROOT, "data/train2.data")
    test_path = os.path.join(PROJECT_ROOT, "data/test2.data")
    train_log_path = os.path.join(PROJECT_ROOT, "log/train/")
    test_log_path = os.path.join(PROJECT_ROOT, "log/test/")
    dev_log_path = os.path.join(PROJECT_ROOT, "log/dev/")

    X, y = ld.load(train_path)  #debug
    X = X.reshape([-1, 6, 6, 1])
    train_num = int(X.shape[0] * Config.trainrate)
    X_train = X[:train_num]
    y_train = y[:train_num]
    X_dev = X[train_num:-1]
    y_dev = y[train_num:-1]
    X_test, y_test = ld.load(test_path)
    X_test = X_test.reshape([-1, 6, 6, 1])
    print("train size :", X_train.shape, y_train.shape)
    print("dev size :", X_dev.shape, y_dev.shape)
    print("test size :", X_test.shape, y_test.shape)
    print("start training")

    with tf.Graph().as_default():
        config = Config()
        nn = NN(config)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver(max_to_keep=10,
                               keep_checkpoint_every_n_hours=0.5)
        #必须在session外面
        y_train = tf.one_hot(y_train, depth=Config.n_classes)
        y_test = tf.one_hot(y_test, depth=Config.n_classes)
        y_dev = tf.one_hot(y_dev, depth=Config.n_classes)
        shuffle_batch_x, shuffle_batch_y = tf.train.shuffle_batch(
            [X_train, y_train],
            batch_size=Config.batch_size,
            capacity=10000,
            min_after_dequeue=5000,
            enqueue_many=True)

        with tf.Session() as session:
            merged = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(train_log_path, session.graph)
            test_writer = tf.summary.FileWriter(test_log_path)
            dev_writer = tf.summary.FileWriter(dev_log_path)
            session.run(init)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(session, coord)

            y_test, y_dev = session.run([y_test, y_dev])
            i = 0
            try:
                while not coord.should_stop():
                    #for i in range(Config.n_epochs * X_train.shape[0] // Config.batch_size):
                    #offset = (i * Config.batch_size) % (X_train.shape[0] - Config.batch_size)
                    #batch_x = X_train[offset:(offset + Config.batch_size), :]
                    #batch_y = y_train[offset:(offset + Config.batch_size)]
                    batch_x, batch_y = session.run(
                        [shuffle_batch_x, shuffle_batch_y])
                    loss = nn.train_on_batch(session, batch_x, batch_y, merged,
                                             train_writer, i)
                    i += 1
                    if i % 1000 == 0:
                        dev_acc = nn.accuracy(session, X_dev, y_dev, "dev",
                                              merged, dev_writer, i)
                        test_acc = nn.accuracy(session, X_test, y_test, "test",
                                               merged, test_writer, i)
                        print("step:", i, "loss:", loss, "dev_acc:", dev_acc,
                              "test_acc:", test_acc)
                        saver.save(session,
                                   os.path.join(PROJECT_ROOT,
                                                "model/model_ckpt"),
                                   global_step=i)
            except tf.errors.OutOfRangeError:
                print("done")
            finally:
                coord.request_stop()
            coord.join(threads)
Exemple #38
0
@contact: [email protected]
@file: paipaidai_xgboost.py
@time: 19-7-1 上午8:39
@desc:
"""

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib
from lightgbm.sklearn import LGBMClassifier
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, log_loss, accuracy_score
from load_data import load

train_values, test_values, clf_labels, clf_labels_r, clf_labels_2 = load()

train_num = 1000000

print(train_values.shape)

# 五折验证也可以改成一次验证,按时间划分训练集和验证集,以避免由于时序引起的数据穿越问题。

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019)  #5折交叉验证

clf = LGBMClassifier(  #lightGBM分类模型
    learning_rate=0.05,
    n_estimators=10000,
    subsample=0.8,
    subsample_freq=1,
    colsample_bytree=0.8)
Exemple #39
0
 def setUp(self):
     self.test_load = l.load('indicator gapminder gdp_per_capita_ppp.xlsx')
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS.batch_size
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.iteritems()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
print("Loading data...")
u, i, y = load("ml-100k")
# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
u_shuffled = u[shuffle_indices]
i_shuffled = i[shuffle_indices]
y_shuffled = y[shuffle_indices]
# Split train/test set
# TODO: This is very crude, should use cross-validation
u_train, u_dev = u_shuffled[:-1000], u_shuffled[-1000:]
i_train, i_dev = i_shuffled[:-1000], i_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]


print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
    n_folds = 10
    shuffle = False

    try:
        X_n, X_submission_n = pickle.load(open("X_n.pkl", "rb"))
    except (OSError, IOError) as e:
        X_n, X_submission_n = nn.load_data_for_decoder()

        # X_n = encode.fit_transform(X_n)
        # X_submission_n = encode.fit_transform(X_submission_n)
        X_n = encode.get_encoded_feature(X_n)
        X_submission_n = encode.get_encoded_feature(X_submission_n)

        pickle.dump((X_n, X_submission_n), open("X_n.pkl", "wb"))

    X, y, X_submission, Id = load_data.load()
    X, X_submission = X.values, X_submission.values

    y = y.values
    # include auto-encode features
    add_feats = False
    if (add_feats):
        X = np.append(X, X_n, axis=1)
        X_submission = np.append(X_submission, X_submission_n, axis=1)
    else:  # add data to the training dataset
        X_n, X_submission_n = nn.load_data_for_decoder()
        X_n = encode.fit_transform(X_n)
        # add data by fit transform the orginal into same attributes data and then add to the data set
        X = np.append(X, X_n, axis=0)
        y = np.append(y, y, axis=0)
Exemple #42
0
                    type=int,
                    default=16,
                    help='Number of hidden units.')
parser.add_argument('--link-pred',
                    action='store_true',
                    default=False,
                    help='Enable Link Prediction Loss')

args = parser.parse_args()
device = "cuda" if not args.no_cuda and torch.cuda.is_available() else "cpu"
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)
# if device == 'cuda':
#     torch.cuda.manual_seed(args.seed)

adj, features, labels = load_data.load()
max_num_nodes = max([g.shape[0] for g in adj])
labels = torch.from_numpy(labels).to(device)

idx = np.arange(600)
np.random.RandomState(seed=124).shuffle(idx)
idx_train, idx_test = idx[:480], idx[480:]

model = Model(pool_size=int(max_num_nodes * 0.25), device=device).to(device)
model.train()
# optimizer = optim.SGD(model.parameters(), lr=1e-5)
optimizer = optim.Adam(model.parameters())
for e in tqdm(range(args.epochs)):
    pred_labels = []
    for i, idx in enumerate(idx_train):
        adj_train = torch.from_numpy(adj[idx]).to(device).float()
Exemple #43
0
    n = 100
    ps = 10**(np.random.rand(n)*2.) # FIXME: yes Dan, these should be np.exp
    taus = 10**np.random.rand(n)
    myamp = 10**(np.random.rand(n)*1.18)
    names = ['010972873', '011137075', '011502597', '011717120', '005607242', \
             '006370489', '006442183', '006531928', '006603624', '009574283']

    m = 0
    for i in range(n):
        for c, KID in enumerate(names):
            m += 1
            star = m

            # load Kepler lcs, join quarters together
            lc_files = np.array(glob.glob("%s/kplr%s*"%(datadir, KID)))
            x, y, yerr = load(lc_files[0])
            for j in range(1, len(lc_files)):
                x = np.concatenate((x, load(lc_files[j])[0]))
                y = np.concatenate((y, load(lc_files[j])[1]))
                yerr = np.concatenate((yerr, load(lc_files[j])[2]))

            # generate simulated lcs
            doplot = True
            pars, ts = mklc(x, nspot, incl, fixedamp, taus[i], diffrot, \
                    dur, samp, noise, ps[i], Amplitude, doplot)

            # plot
            if doplot == True:
                pylab.savefig('%s/%s'%(savedir, (star)))

            print "saving data..."
Exemple #44
0
from load_data import load
from itertools import combinations, permutations

args = sys.argv
#choose the algorithm
#filename = args[1]
filename = 'SVD_reconUrban_F210.npz'
sig = float(args[1])
obs = float(args[2])  #fraction observed
#sig = .0003
#obs = 0.7
init_iter = 10
iterate = 100
#obs = 0.7
limit = 10000
full_data = load(filename)
train_count = 500
test_count = 100
total_data = train_count + test_count
full_data = full_data[:total_data, :30]
maxarray = np.amax(full_data)
scale = 1.0 / maxarray
full_data = scale * full_data
datapoints, dimension = full_data.shape
sig_w = 0.1
sig_test = sig
#full_data,Z_gen = construct_data(data_dim,data_count + held_out,sig,data_type)
if train_count + test_count > datapoints:
    data_count = datapoints

#indices = list(combinations([i for i in range(datapoints)],total_data))
                                  steps_per_epoch=X_train.shape[0] /
                                  BATCH_SIZE,
                                  validation_data=(X_val, Y_val),
                                  epochs=epochs,
                                  verbose=1,
                                  callbacks=[mcp_save],
                                  shuffle=True)

    return model


#### load data and reshapeimport sys
# sys.argv[1] = './../myData/hw8_data/train.csv'
print('training file:', sys.argv[1])
D_SET = 10
X_train, Y_train = load_data.load(sys.argv[1], 1)
X_train = X_train.reshape(-1, 48, 48, 1)

#### rescale
X_train = X_train / 255.

print('X_train shape : ', X_train.shape)
print('Y_train shape : ', Y_train.shape)

#### convert class vectors to binary class matrices (one hot encoding vector)
Y_train = np_utils.to_categorical(Y_train, 7)

#### build model
for i in range(1):
    print('No.' + str(i))
    SaveModel_name = 'model_best_62998.h5'
Exemple #46
0
            wiki = reg1.sub(' (', wiki)
            wiki = reg2.sub(')', wiki)
            html = parse(wiki, showToc=False)
            self.load_html_string(html, 'file:///')
        else:
            self.open(os.path.join(self.runpath, 'err.html'))
        
    def _pic(self, m):
        i1, i2, i3 = m.group(1).split('|')
        f = '_'.join([cat[i1], i2, i3]) + '.gif'
        f = os.path.join(self.runpath, 'pic', f)
        return '<img src="%s"/>' % f    

__WV__ = WV()
import load_data
__ALL__, __BU__, __HUA__, __YIN__ = load_data.load()
__BLACK__, __BLACK_BU__ = load_data.black()
__BLACK_REF__ = None

class Name(gtk.VBox):
    '''名字'''
    def __init__(self):
        gtk.VBox.__init__(self)
        self.label = gtk.Label('×')
        self.label.set_use_markup(True)
        self.ev = gtk.EventBox()
        self.ev.add(self.label)
        self.ev.connect('button-press-event', 
            lambda o,e:__WV__.show(self.label.get_text().decode('utf8')[0]))
        self.b = gtk.Button('换字')
        self.b.connect('clicked', self.choose)
Exemple #47
0
import numpy
import cPickle
import theano
import theano.tensor as tensor
import theano.tensor.shared_randomstreams as RS
from theano import config as cf
import load_data

dataset = 'data/authors.pkl'
train, valid, test = load_data.load(dataset)


def ngram2(seqs, vocab_size=1000):
    freq_pair = numpy.zeros((vocab_size, vocab_size))
    freq_w = numpy.zeros((vocab_size, ))

    for seq in seqs:
        for i, w in enumerate(seq):
            if w >= vocab_size: w1 = 0
            else: w1 = w

            if i == len(seq) - 1:
                freq_w[w1] += 1
                continue

            if seq[i + 1] >= vocab_size: w2 = 0
            else: w2 = seq[i + 1]

            freq_pair[w1, w2] += 1
            freq_w[w1] += 1
    verbose = True
    shuffle = False
    
#    print("Reading the data")
#    dataTrain = cu.get_dataframe(train_file)
#
#    print("Extracting features")
#    fea = features.extract_features(feature_names, dataTrain)
#    X = fea
#    y = dataTrain["OpenStatus"]
#    
#    dataTest = cu.get_dataframe(test_file)
#    test_features = features.extract_features(feature_names, dataTest)
#    X_submission = test_features
    
    X, y, X_submission = load_data.load()

    if shuffle:
        idx = np.random.permutation(y.size)
        X = X[idx]
        y = y[idx]

    skf = list(StratifiedKFold(y, n_folds))

    clfs = [RandomForestClassifier(n_estimators=50, n_jobs=1, criterion='gini', verbose=2, compute_importances=True),
            RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
            ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
            ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
            GradientBoostingClassifier(learn_rate=0.05, subsample=0.5, max_depth=6, n_estimators=50)]

    print "Creating train and test sets for blending."
Exemple #49
0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Bidirectional, LSTM
from tensorflow.keras.optimizers import Adam
import load_data

grid_num = 1
X_train, y_train, X_test, y_test = load_data.load(grid_num)

model = Sequential()
model.add(
    Bidirectional(
        LSTM(units=128, input_shape=(X_train.shape[1], X_train.shape[2]))))
model.add(Dense(units=1, activation="relu"))
adam = Adam(learning_rate=0.1)
model.compile(
    optimizer=adam,
    loss="mse",
)
model.fit(X_train, y_train, epochs=15, validation_split=0.1)

print(model.evaluate(X_test, y_test))
Exemple #50
0
 def test_login_handler(self):
     load()
     response = self.testapp.get('/login')
     self.assertEquals(response.status_int, 302, msg="Not redirected.")
import pandas as pd
import numpy as np
from load_data import load
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


def execute(X_train, X_test, y_train, y_test):
    clf = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc_score = mean_squared_error(y_test, y_pred)
    print(np.sqrt(acc_score) * 48)


if __name__ == '__main__':
    X_train, y_train = load()
    X_train, X_test, y_train, y_test = train_test_split(X_train,
                                                        y_train,
                                                        test_size=0.33,
                                                        random_state=42)
    execute(X_train, X_test, y_train, y_test)
Exemple #52
0
    """Logloss, i.e. the score of the bioresponse competition.
    """
    attempt = np.clip(attempt, epsilon, 1.0 - epsilon)
    return -np.mean(actual * np.log(attempt) +
                    (1.0 - actual) * np.log(1.0 - attempt))


if __name__ == '__main__':

    np.random.seed(0)  # seed to shuffle the train set

    n_folds = 10
    verbose = True
    shuffle = False

    X, y, X_submission = load_data.load()

    if shuffle:
        idx = np.random.permutation(y.size)
        X = X[idx]
        y = y[idx]

    skf = list(StratifiedKFold(y, n_folds))

    clfs = [
        RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
        RandomForestClassifier(n_estimators=100,
                               n_jobs=-1,
                               criterion='entropy'),
        ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
        ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
def train():
    with tf.Graph().as_default():
        # global_step = tf.contrib.framework.get_or_create_global_step()

        data, train_size, test_size, input_shape, nclass = load_data.load(
            FLAGS.data)
        [X_train, y_train, X_test, y_test] = data

        imgs_tr, _, imgs_te, _ = load_data.load_pure(FLAGS.data)

        global_step = tf.Variable(0, trainable=False)
        learning_rate_placeholder = tf.placeholder(tf.float32)
        if FLAGS.lr_strategy == 'exp':
            learning_rate = tf.train.exponential_decay(
                learning_rate_placeholder,
                global_step,
                FLAGS.decay_step,
                FLAGS.decay_rate,
                staircase=True)
        else:
            learning_rate = learning_rate_placeholder
        images = tf.placeholder(tf.float32, input_shape)
        labels = tf.placeholder(tf.int32, [None] + [nclass])
        is_training = tf.placeholder(tf.bool)

        with tf.variable_scope('Teacher'):
            t_logits = nets.get_modelf(FLAGS.t_model)(images,
                                                      is_training=is_training,
                                                      nclasses=nclass,
                                                      k=FLAGS.t_vggsize)
            t_loss = nets.teacher_cl_loss(labels, t_logits)

        t_train_op = nets.t_train_op(
            t_loss,
            utils.get_all_variables_from_scope('Teacher'),
            learning_rate=learning_rate_placeholder,
            global_step=global_step)

        if FLAGS.s_model != 'no':
            with tf.variable_scope('Student'):
                s_logits = nets.get_modelf(FLAGS.s_model)(
                    images,
                    is_training=is_training,
                    nclasses=nclass,
                    k=FLAGS.s_vggsize)
                s_loss = nets.student_cl_loss(t_logits, s_logits)

            s_train_op = nets.train_op(
                s_loss,
                utils.get_all_variables_from_scope('Student'),
                learning_rate=learning_rate_placeholder)
        else:
            s_train_op = tf.no_op()
            s_loss = t_loss
            s_logits = t_logits

        ds_logits = None

        if FLAGS.go_deeper:
            with tf.variable_scope('Deep-Student'):
                ds_logits = nets.get_modelf(FLAGS.s_model)(
                    images,
                    is_training=is_training,
                    nclasses=nclass,
                    k=FLAGS.s_vggsize)
                ds_loss = nets.student_cl_loss(s_logits, ds_logits)

            ds_train_op = nets.train_op(
                ds_loss,
                utils.get_all_variables_from_scope('Deep-Student'),
                learning_rate=learning_rate_placeholder)

        es_logits = None

        # if FLAGS.sgld_target:
        #     ensemble_logits = tf.placeholder(tf.float32, [None] + [nclass])
        #     with tf.variable_scope('Ensemble-Student'):
        #         es_logits = nets.get_modelf(FLAGS.s_model)(images,
        #                                                    is_training=is_training,
        #                                                    nclasses=nclass)
        #         es_loss = nets.student_cl_loss(ensemble_logits, es_logits)
        #     es_train_op = nets.train_op(es_loss,
        #                                 utils.get_all_variables_from_scope('Ensemble-Student'),
        #                                 learning_rate=learning_rate)

        session_config = tf.ConfigProto(allow_soft_placement=True)
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.80

        init_op = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as sess:
            n_iter_to_train = FLAGS.n_epochs * train_size / FLAGS.batch_size
            n_iter_per_epoch = train_size / FLAGS.batch_size
            ensemble = PseudoEnsemble(np.zeros_like(y_test))
            train_ensemble = PseudoEnsemble(np.zeros_like(y_train))

            eval_once = get_eval_once(X_test, y_test, imgs_te, sess,
                                      is_training, images, t_logits, s_logits,
                                      es_logits, ds_logits, ensemble)

            tr_eval_once = get_eval_once(X_train,
                                         y_train,
                                         imgs_tr,
                                         sess,
                                         is_training,
                                         images,
                                         t_logits,
                                         s_logits,
                                         es_logits,
                                         ds_logits,
                                         ensemble,
                                         mode='train')
            step = 0

            sess.run(init_op)

            es_loss_val = -1
            k = 0

            for k in range(FLAGS.n_epochs):
                for X_batch_aug, y_batch, idxs in load_data.batch_iterator_train_crop_flip(
                        X_train, y_train, FLAGS.batch_size):
                    step += 1
                    _lr = FLAGS.lr
                    if FLAGS.lr_strategy == 'linear':
                        _lr = FLAGS.lr if k < FLAGS.lr_to0_from else FLAGS.lr * (
                            n_iter_to_train -
                            step) / (n_iter_to_train -
                                     FLAGS.lr_to0_from * n_iter_per_epoch)
                        if k > 190:
                            _lr = 0.00001

                    if FLAGS.lr_strategy == 'step':
                        if k < 100:
                            _lr = 0.001
                        elif k < 200:
                            _lr = 0.0001
                        elif k < 300:
                            _lr = 1e-5
                        else:
                            _lr = 1e-6

                    if _lr < 0:
                        break

                    # print('learning rate', _lr)

                    t_loss_val, s_loss_val, _, _ = sess.run(
                        [t_loss, s_loss, t_train_op, s_train_op], {
                            images: X_batch_aug,
                            labels: y_batch,
                            is_training: True,
                            learning_rate_placeholder: _lr
                        })

                    # if step % FLAGS.s_step == 0 and k >= FLAGS.start_snn:
                    #     s_loss_val, _ = sess.run([s_loss, s_train_op], {
                    #         images: X_batch_aug,
                    #         labels: y_batch,
                    #         is_training: True,
                    #         learning_rate: _lr
                    #     })

                    if FLAGS.go_deeper and step % FLAGS.s_step == 0 and k >= FLAGS.start_snn2:
                        ds_loss_val, _ = sess.run(
                            [ds_loss, ds_train_op], {
                                images: X_batch_aug,
                                labels: y_batch,
                                is_training: True,
                                learning_rate_placeholder: _lr
                            })

                    # if FLAGS.sgld_target:
                    #     es_loss_val, _ = sess.run([es_loss, es_train_op], {
                    #         images: X_batch_aug,
                    #         ensemble_logits: train_ensemble._y[idxs],
                    #         is_training: True,
                    #         learning_rate: _lr
                    #     })

                    with open(FLAGS.train_dir + '/my_train_logs.csv',
                              'a') as f:
                        f.write('{},{},{},{},{}\n'.format(
                            k, step, t_loss_val, s_loss_val, es_loss_val))

                    if FLAGS.ensemble_step != -1 and step % FLAGS.ensemble_step == 0:
                        t_logits_val = get_logist(X_test, nclass, sess,
                                                  is_training, t_logits,
                                                  images)
                        ensemble.add_estimator(t_logits_val)

                        if FLAGS.sgld_target or FLAGS.make_sgld_target:
                            t_logits_val = get_logist(X_train, nclass, sess,
                                                      is_training, t_logits,
                                                      images)
                            train_ensemble.add_estimator(t_logits_val)
                            np.save(
                                FLAGS.train_dir + 'ensemble-train-logits.npy',
                                train_ensemble._y)

                    if step % FLAGS.eval_frequency == 0:
                        t_acc = eval_once(k, step)

                    if k % FLAGS.save_model_frequency == 0:
                        saver.save(sess, FLAGS.train_dir + '/model.ckpt')
Exemple #54
0
import load_data as ld

# File paths
CHECKPOINT_PATH = "../checkpoints"
WEIGHTS_FILE_NAME = '/weights'
MODEL_FILE_NAME = '/model.h5'

# Training parameters
epochs = 200  # Number of epochs
batch_size = 32  # Training batch size
validation_split = 0.1  # Fraction of training data for validation
verbose = 1  # Show progress bar

# Load training data
print("Loading training data...")
X_train, Y_train = ld.load(mode="train")

# Create model
print("Creating model...")
model = cm.create()

# Train model
print("Training model...")
model.fit(X_train,
          Y_train,
          epochs=epochs,
          batch_size=batch_size,
          validation_split=validation_split,
          verbose=verbose)

# Save model and model weights
Exemple #55
0

if __name__ == "__main__":

	# setting the hyper parameters
	parser = argparse.ArgumentParser()
	parser.add_argument('--batch_size', default=1, type=int, help='Batch size')
	parser.add_argument('--epochs', default=400, type=int, help='Number of epochs')
	parser.add_argument('--is_training', default=1, type=int, help='Training(1) or testing(0)')
	parser.add_argument('--data_path', default='data/',help='Path to data folder')
	parser.add_argument('--save_weights', default=1, type=int, help='Save weights (Yes=1, No=0)')
	parser.add_argument('--plot', default=1, type=int, help='Plot accuracy or loss curves (Yes=1, No=0)')
	args = parser.parse_args()
	
	#load data
	(X_train, target_input), (X_val, target_val), (X_test, true_labels) = ld.load()
	
	image_height, image_width, depth = 96, 96, 1
	input_dim = (image_height,image_width,depth)
	#define model
	model = model_fkpd(input_dim)

	# train or test
	if args.is_training:
		hist = train(model=model, data=((X_train, target_input), (X_val, target_val)), args=args)
		if args.plot:
			plot(hist)
	else:  # as long as weights are given, will run testing
		model.load_weights('model_weights.h5')
		error = test(model=model, data=((X_test, true_labels)))
		print("RMSE Value : ", np.sqrt(error/(30*X_test.shape[0])))
import load_data
from create_model import *

# -data: dataset
# -p: p
# -log: log & model saving file
# -dim: dimension of highway layers
# -shared: 1 if shared, 0 otherwise, 2 if both

########################## LOAD DATA ###############################################
print 'Loading data...'
arg = load_data.arg_passing(sys.argv)
dataset, nlayers, inpShape, saving, dim, shared = arg['-data'], arg['-nlayers'], arg['-inpShape'], arg['-saving'], arg['-dim'], arg['-shared']

train, valid, test = load_data.load(dataset)
log = 'log/' + saving + '.txt'

train_x, train_y = train[0], train[1]
valid_x, valid_y = load_data.shuffle(valid[0], valid[1])
test_x, test_y = load_data.shuffle(test[0], test[1])

n_classes = max(train_y)
if n_classes > 1: n_classes += 1

if n_classes == 1:
    loss = 'binary_crossentropy'
    metric = f1
    metric_str = 'f1'
else:
    loss = 'sparse_categorical_crossentropy'
import pandas
import load_data
import numpy as np
from models import stanfordnlp_model

print('Train data Analysis')
df = load_data.load('dataset/gap-development.tsv')
print("pronoun after A count",
      (df['A-offset'] > df['Pronoun-offset']).sum(axis=0))
print("pronoun after B count",
      (df['B-offset'] > df['Pronoun-offset']).sum(axis=0))
print("A is True count", df['A-coref'].sum(axis=0))
print("B is True count", df['B-coref'].sum(axis=0))
print("non both count",
      ((df['A-coref'] == False) & (df['B-coref'] == False)).sum(axis=0))
print("pronoun after A AND A is True count",
      (df['A-coref'] & (df['A-offset'] > df['Pronoun-offset'])).sum(axis=0))
print("pronoun Bfter B BND B is True count",
      (df['B-coref'] & (df['B-offset'] > df['Pronoun-offset'])).sum(axis=0))

a_df = df['Pronoun-offset'] - df['A-offset']
b_df = df['Pronoun-offset'] - df['B-offset']
print("A is near pronoun than B AND A is True count",
      (a_df.abs() < b_df.abs())[df['A-coref']].sum(axis=0))
print("A is near pronoun than B AND B is True count",
      (a_df.abs() < b_df.abs())[df['B-coref']].sum(axis=0))
print("A is near pronoun than B", (a_df.abs() < b_df.abs()).sum(axis=0))

print("Pronoun unique values", df['Pronoun'].unique())
print("A unique values", df['A'].unique())
print("B unique values", df['B'].unique())
Exemple #58
0
Copyright 2012, Emanuele Olivetti.
BSD license, 3 clauses.
"""

from __future__ import division
import numpy as np
import load_data
from sklearn.linear_model import LogisticRegression

def logloss(attempt, actual, epsilon=1.0e-15):
    """Logloss, i.e. the score of the bioresponse competition.
    """
    attempt = np.clip(attempt, epsilon, 1.0-epsilon)
    return - np.mean(actual * np.log(attempt) + (1.0 - actual) * np.log(1.0 - attempt))


if __name__ == '__main__':
    X, y, X_submission, y_real = load_data.load()
    
    clf = LogisticRegression()
    clf.fit(X, y)
    y_submission = clf.predict_proba(X_submission)[:,1]

    print "Linear stretch of predictions to [0,1]"
    y_submission = (y_submission - y_submission.min()) / (y_submission.max() - y_submission.min())
    
    print "Saving Results."
    np.savetxt(fname='../Submissions/blendML50v2_1749rf500_3.csv', X=y_submission, fmt='%0.9f')
    
Exemple #59
0
import math

def rmsle(y, y0):
    assert len(y) == len(y0)
    return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2)))

if __name__ == '__main__':
    
    #iniciamos la seed para la aleatoriedad y creamos un 5 fold cross validation

    np.random.seed(0)
    n_folds = 5
    
    #cagamos el dataset

    X, X_submission, ys, ids, idx = load_data.load()    
    
    # evitamos el logscale en la evaluacion:
    ys = np.log(ys/500.0 + 1.0)      
    y_submission = np.zeros((X_submission.shape[0], 12))    

      #se prueba con n stimators 1000 para que se ejecute más rápido
    regs = [GradientBoostingRegressor(learning_rate=0.001, subsample=0.5, max_depth=6, n_estimators=10000)]

    dataset_blend_train = np.zeros((X.shape[0], 12*len(regs)), dtype=np.double)
    dataset_blend_submission = np.zeros((X_submission.shape[0], 12*len(regs), n_folds), dtype=np.double)
    
    
    for i in range(12):
        print "Month", i
        y = ys[:,i]