Ejemplo n.º 1
0
def main():
    start = time.clock()
    k_s = 1000
    x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, k_s)
    end = time.clock()
    print "Time-sgd: ", end - start

    #convert list to numpy array and evaluate function value
    x_vals_a = np.asarray(x_vals_s)
    f_vals_s = fsum(x_vals_a)

    # plot showing f(xi) for each iteration of both methods
    plt.plot(range(k_s), f_vals_s, 'r-')
    plt.xlabel('Iteration')
    plt.ylabel('f(xi)')
    plt.xticks(np.arange(0, k_s, 100))
    plt.title('''f(xi) vs i of Stochastic Gradient Descent''')
    plt.show()
    data_750 = []
    data_1000 = []
    for i in range(30):
        x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 1000)
        data_1000.append(fsum(x_vals_s[-1]))

    for i in range(30):
        x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 750)
        data_750.append(fsum(x_vals_s[-1]))

    print 'SGD 750 iterations, mean: ', np.mean(
        data_750), 'variance: ', np.var(data_750)
    print 'SGD 1000 iterations, mean: ', np.mean(
        data_1000), 'variance: ', np.var(data_1000)
Ejemplo n.º 2
0
def main():
    df = pd.read_csv(DATA_FILEPATH)
    print(df.columns)

    x_data = df.drop(["clase"], axis=1).as_matrix()
    y_data = np.array(list(map(int, df["clase"])), dtype="int32")
    n_samples = x_data.shape[0]
    print(type(y_data))

    x = tensor.matrix(name="x")
    y = tensor.ivector(name="y")

    clf = lr.LogisticRegression(x, x_data.shape[1], 2)

    with_validation = True
    if with_validation:
        val_frac = 0.3
        val_samples = int(n_samples * val_frac)
        train_samples = n_samples - val_samples
        x_tr, y_tr = x_data[:train_samples, :], y_data[:train_samples]
        x_tr_sh = theano.shared(x_tr, borrow=True)
        y_tr_sh = theano.shared(y_tr, borrow=True)
        x_val, y_val = (x_data[train_samples:(train_samples + val_samples), :],
                        y_data[train_samples:(train_samples + val_samples)])
        x_val_sh = theano.shared(x_val, borrow=True)
        y_val_sh = theano.shared(y_val, borrow=True)
        print("calling sgd_with_validation")
        sgd.sgd_with_validation(clf,
                                x_tr_sh,
                                y_tr_sh,
                                x_val_sh,
                                y_val_sh,
                                learning_rate=0.01,
                                reg_term=0.0001,
                                batch_size=32,
                                n_epochs=1000,
                                max_its=10000,
                                improv_thresh=0.01,
                                max_its_incr=4,
                                rel_val_tol=1e-3,
                                verbose=True)
    else:
        x_tr_sh = theano.shared(x_data, borrow=True)
        y_tr_sh = theano.shared(y_data, borrow=True)
        print("calling sgd")
        sgd.sgd(clf,
                x_tr_sh,
                y_tr_sh,
                y=y,
                learning_rate=0.01,
                reg_term=0.0001,
                batch_size=220,
                rel_tol=2e-3,
                n_epochs=256,
                verbose=True)

    acc = theano.function([x, y], clf.score(y))
    print("accuracy: %.2f%%" % (100 * acc(x_data, y_data)))
def buildClassifier(subset, name, cs, iterations=100, dataDir='data', det=False, verbose=0):
    if (verbose >= 1):
        print 'Enter buildClassifier'
    if det:
        random.seed(1)

    evalC=[]
    bC=0.0
    evbC=0.0
    
    mfccs, mfccMatching, _, _, _ = preprocess.preprocess(subset, dataDir=dataDir, verbose=verbose)
    y = buildLabels(name, mfccMatching)
    dicL, dicV, dicT = split(mfccMatching, name, verbose=verbose)
    # learning data
    xL=[mfccs[k] for k in np.concatenate(dicL.values())]
    yL=[y[k] for k in np.concatenate(dicL.values())]
    # validation data
    xV=[mfccs[k] for k in np.concatenate(dicV.values())]
    yV=[y[k] for k in np.concatenate(dicV.values())]
    # testing data
    xT=[mfccs[k] for k in np.concatenate(dicT.values())]
    yT=[y[k] for k in np.concatenate(dicT.values())]

    for c in cs:
        if (verbose >= 1):
            print 'Processing C: ', c
            print 'Learning...'
        w = sgd.sgd(xL, yL, np.zeros(len(xL[0])+1), iterations, 1, sgd.L, 0.01, c)
        if (verbose >= 1):
            print 'Evaluating...'
        ev=sgd.eval(xV, yV, w[:-1], w[-1])
        evalC.append(ev)
        if (ev > evbC):
            evbC=ev
            bC=c

    if (verbose >= 1):
        print 'Building classifier with C:', bC, '...'
    xL2=xL+xV
    yL2=yL+yV
    w = sgd.sgd(xL2, yL2, np.zeros(len(xL[0])+1), iterations, 1, sgd.L, 0.01, bC)
    if (verbose >= 1):
        print 'Evaluating classifier...'
    ev=sgd.eval(xT, yT, w[:-1], w[-1])
    def f(wavFile):
        x = preprocess.mfcc(wavFile)
        tot=len(x)
        ok=0.0
        for i in xrange(tot):
            ok += np.dot(w[:-1], x[i]) + w[-1]
        return int(ok/tot > 0)
    if (verbose >= 1):
        print 'Exit buildClassifier'
    return f, ev, evalC
Ejemplo n.º 4
0
def main():
    x_750 = []
    x_1000 = []
    for i in range(0, 30):
        x_750.append(sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=750))
        x_1000.append(
            sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000))
    x_750 = np.matrix(x_750)
    x_1000 = np.matrix(x_1000)
    print "sgd complete"
    print "750  iterations and 30 times, mean = %.5f, var = %.5f" % (np.mean(
        x_750[..., -1]), np.var(x_750[..., -1]))
    print "1000 iterations and 30 times, mean = %.5f, var = %.5f" % (np.mean(
        x_1000[..., -1]), np.var(x_1000[..., -1]))
Ejemplo n.º 5
0
def word2vec_model(args, dataset):
    tokens = dataset.tokens()
    nWords = len(tokens)

    startTime = time.time()
    wordVectors = np.concatenate(
        ((np.random.rand(nWords, args.vector_size) - 0.5) / args.vector_size,
         np.zeros((nWords, args.vector_size))),
        axis=0)
    wordVectors = sgd(
        lambda vec: word2vec_sgd_wrapper(
            skipgram, tokens, vec, dataset, args.window_size,
            negSamplingCostAndGradient), wordVectors, args.learning_rate,
        args.iterations, None, args.use_saved, args.save_every,
        args.vector_path)
    # Note that normalization is not called here. This is not a bug,
    # normalizing during training loses the notion of length.

    logging.info("training took %d seconds" % (time.time() - startTime))

    # concatenate the input and output word vectors
    wordVectors = np.concatenate(
        (wordVectors[:nWords, :], wordVectors[nWords:, :]), axis=0)
    # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]
    return wordVectors
def main():
    start = time.clock()
    x_SGD = sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000)
    end = time.clock()
    print "SGD    time: %f" % (end - start)
    print "fsum       = %f" % (fsum(x_SGD[-1]))

    start = time.clock()
    x_GD = GradientDescent(fsum, fsumprime, -5, epson=0.0001)
    end = time.clock()
    print "GD     time: %f" % (end - start)
    print "fsum       = %f" % (fsum(x_GD[-1]))

    start = time.clock()
    x_new = NewtonMethod(fsum, fsumprime, fsumprimeprime, -5, epson=0.0001)
    end = time.clock()
    print "Newton time: %f" % (end - start)
    print "fsum       = %f" % (fsum(x_new[-1]))

    plt.subplot(311)
    plt.plot(x_SGD, 'r')
    plt.subplot(312)
    plt.plot(x_GD, 'b')
    plt.subplot(313)
    plt.plot(x_new, 'm')
    plt.show()
Ejemplo n.º 7
0
def task3():
    sgd_params = {
        'GMM': {
            'alpha': 1.0,
            'mb_num': 200
        },
        'Peaks': {
            'alpha': 0.1,
            'mb_num': 250
        },
        'SwissRoll': {
            'alpha': 0.08,
            'mb_num': 250
        }
    }

    for data_set, params in sgd_params.items():
        X_tr, y_tr, X_te, y_te = get_data(data_set)
        _ = sgd(X_tr,
                y_tr,
                X_te,
                y_te,
                alpha=params['alpha'],
                mb_num=params['mb_num'],
                max_epochs=200,
                data_set=data_set)
Ejemplo n.º 8
0
def main():
    x = sgd(fi,fiprime,x0=-5,i_range = maxi,t=1,iteration = 1000)
    print "sgd complete"
    f = []
    print "Plotting, may take a while"
    for n in x:
        f.append(fsum(n))
    
    plt.plot(f)
    plt.xlabel("Number of iterations(i)")
    plt.ylabel("fsum(x_i)")
    plt.show()
Ejemplo n.º 9
0
    def fit(self, X, qmatrix=None, learn_b=True):
        """ Normalization: want elements of Q-matrix be betw. 0 - 1 """
        S, P = X.shape
        C = self.concepts
        if qmatrix is None:
            qmatrix = np.random.normal(loc=0.5, scale=0.1, size=(C, P))
        skills = np.random.normal(scale=0.1, size=(S, C))
        mdat = np.ma.masked_array(X, np.isnan(X))
        b = np.mean(mdat, axis=0).filled(0) if learn_b else np.zeros(P)

        self.qmatrix, self.skills, self.b = sgd(X, self.alpha, int(C),
                                                self.n_iters, qmatrix,
                                                skills, b, learn_b)
        self.prediction = np.dot(self.skills, self.qmatrix) + self.b
        return nan_rmse(X, self.prediction)
Ejemplo n.º 10
0
def main():
    start = time.clock()
    x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 1000)
    end = time.clock()
    print "Time-sgd: ", end - start

    start = time.clock()
    [x_vals_g, k_g] = gradientdescent(fsum, fsumprime, -5, 0.0001, 0.1, 0.6)
    end = time.clock()
    print "Time-gradient descent: ", end - start

    start = time.clock()
    [x_vals_n, k_n] = newtonsmethod(fsum, fsumprime, fsumprimeprime, -5,
                                    0.0001, 0.1, 0.6)
    end = time.clock()
    print "Time-newton's method: ", end - start

    print 'SGD fsum(x*): ', fsum(x_vals_s[-1])
    print 'Gradient Descent fsum(x*): ', fsum(x_vals_g[-1])
    print '''Newton's method fsum(x*): ''', fsum(x_vals_n[-1])
Ejemplo n.º 11
0
def my_lslr(dataset, max_epochs, alpha):
    # Initialize local variables
    coeffs = [0.0 for i in range(len(dataset.iloc[0,:]))]
    losses = []
    epochs = 0

    # Iterate over the dataset until max epochs has been reached.
    while epochs < max_epochs:
        for index, data in dataset.iterrows():
            # Run the SGD algorithm.
            coeffs, y_real, y_pred = sgd(data, coeffs, alpha, 0)

            # Record loss for this epoch.
            losses.append(utilities.loss(y_real, y_pred))

            # Stop conditions.
            epochs += 1
            if epochs >= max_epochs:
                break

    print(epochs)
    return coeffs, losses
Ejemplo n.º 12
0
def train(train_x, train_y, val_x, val_y, d, hl, ol, config, lf):

    print("Function Invoked: train")

    epochs, eta, alpha, init_strategy, optimiser, batch_size, ac = config[
        "epochs"], config["learning_rate"], config["weight_decay"], config[
            "init_strategy"], config["optimiser"], config[
                "batch_size"], config["ac"]

    if optimiser == "vgd":
        return vgd.vgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                       epochs, eta, init_strategy, alpha)

    elif optimiser == "sgd":
        return sgd.sgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                       epochs, eta, init_strategy, alpha)

    elif optimiser == "mgd":
        return mgd.mgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                       epochs, eta, init_strategy, batch_size, alpha)

    elif optimiser == "nag":
        return nag.nag(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                       epochs, eta, init_strategy, batch_size, alpha)

    elif optimiser == "adam":
        return adam.adam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                         epochs, eta, init_strategy, batch_size)

    elif optimiser == "rmsprop":
        return rmsprop.rmsprop(train_x, train_y, val_x, val_y, d, hl, ol, ac,
                               lf, epochs, eta, init_strategy, batch_size)

    elif optimiser == "nadam":
        return nadam.nadam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf,
                           epochs, eta, init_strategy, batch_size)
Ejemplo n.º 13
0
    for c in [0,1]:
        for i in xrange(N):
            for j in [0,1]:
                x[c][i][j] = averages[c] + random.uniform(-1,1)
    res = (np.concatenate([x[0],x[1]]),y)
    return res

iterations=800
eps = 0.01
eta = 1
C=1
averages = [1,3]
sample=100
(xApp, yApp) = genData(averages, sample)
k=np.dot
w1 = sgd.sgd(xApp, yApp, np.zeros(len(xApp[0])+1),iterations,eta,sgd.L,eps,C)
w,b = w1[:-1],w1[-1]


# w=np.dot(alp, xApp)
yPred=[np.dot(w, xApp[i])+b for i in xrange(2*sample)]
# print np.multiply(yApp, yPred)

def makeTitle(iterations,eta,eps):
    res = 'SGD with '
    res = res + str(iterations) + ' iterations, '
    res = res + 'eta=' + str(eta)+ ', epsilon=' + str(eps)
    res = res + '\n'
    return res
    
Ejemplo n.º 14
0
for delta in [0.05, 0.1, 0.2]:
    for learning_rate in [0.005, 0.01, 0.02]:
        print("starting: " + str(delta) + ", " + str(learning_rate))
        noise_b = lambda x: np.random.normal(0,delta)
        exp_points = int(iters/(datapoints*100))
        xs_exact_loss = np.zeros((exp,exp_points+1))
        xs_noisy_loss = np.zeros((exp,exp_points+1))
        xs_exactm_loss = np.zeros((exp,exp_points+1))
        xs_noisym_loss = np.zeros((exp,exp_points+1))
        xs_exact = np.zeros((exp,exp_points+1,dims))
        xs_noisy = np.zeros((exp,exp_points+1,dims))
        xs_exactm = np.zeros((exp,exp_points+1,dims))
        xs_noisym = np.zeros((exp,exp_points+1,dims))
        AT = np.transpose(A)
        (xs_exact_n) = sgd.sgd(A, b, np.zeros((dims,)), learning_rate, iters, 21200)[1]
        for j in range(exp):
            sys.stdout.write("running: " + str(j) + '\r')
            sys.stdout.flush()
            bn = generate_noise(datapoints, noise_b)
            b2 = b + bn
            (xs_noisy_n) = sgd.sgd(A, b2, np.zeros((dims,)), learning_rate, iters, 21200)[1]
            xs_exact[j] = xs_exact_n
            xs_noisy[j] = xs_noisy_n
            for i in range(exp_points+1):
                xs_exactm[j,i] = np.mean(xs_exact[j,(i//2):i+1,:], axis=0)
                xs_noisym[j,i] = np.mean(xs_noisy[j,(i//2):i+1,:], axis=0)
            xs_exact_loss[j] = np.linalg.norm(np.dot(x-xs_exact[j],AT), axis=1)**2
            xs_noisy_loss[j] = np.linalg.norm(np.dot(x-xs_noisy[j],AT), axis=1)**2
            xs_exactm_loss[j] = np.linalg.norm(np.dot(x-xs_exactm[j],AT), axis=1)**2
            xs_noisym_loss[j] = np.linalg.norm(np.dot(x-xs_noisym[j],AT), axis=1)**2
Ejemplo n.º 15
0
    def validate(self, epochs=200, **kwargs):
        """Validates all models for all polynomials and all parameters, and stores data in validation_errors.

        Creates and populates pandas.DataFrame validation_errors with MSE from bootstrap and kfold resampling techniques,
        as well as model bias and variance from bootstrap, for all combinations of hyperparameters.

        Parameters:
        -----------
        epochs:     int
                    Number of epochs. 200 by default.
        **kwargs:   keyword arguments
                    Passed to sgd.sgd
        """
        model_properties = [model.property_dict for model in self.models]
        model_uniques, model_common = helpers.filter_dicts(model_properties)

        for unique in model_uniques:
            assert len(unique) == len(
                model_uniques[0]
            ), "All models must have the same property types"
            assert len(
                unique
            ) > 0, "Two models with the same property_dict has been sent in"

        index_parameters = helpers.listify_dict_values(model_uniques)
        parameter_names = [key for key in index_parameters]
        model_parameters = [values for _, values in index_parameters.items()]
        metric_texts = [metric.__doc__ for metric in self._metrics]

        errors_index = pd.MultiIndex.from_product(
            [metric_texts, *model_parameters],
            names=['Metric', *parameter_names])
        self.errors_df = pd.DataFrame(dtype=float,
                                      index=errors_index,
                                      columns=range(1, epochs + 1))

        if self.polynomials is not None:
            X_train = linear_models.poly_design_matrix(self.polynomials,
                                                       self.data['x_train'])
            X_validate = linear_models.poly_design_matrix(
                self.polynomials, self.data['x_validate'])

        y_train, y_validate = self.data['y_train'], self.data['y_validate']

        idx = pd.IndexSlice

        for i, (model,
                model_unique) in enumerate(zip(self.models, model_uniques)):
            print(
                f"\r |{'='*(i*50//len(self.models))}{' '*(50-i*50//len(self.models))}| {i/len(self.models):.2%}",
                end="",
                flush=True)

            if model.name == 'OLS' or model.name == 'Ridge':
                x_train, x_validate = X_train, X_validate
            else:
                x_train, x_validate = self.data['x_train'], self.data[
                    'x_validate']

            model_indexes = [model_unique[key] for key in index_parameters]

            for j, metric in enumerate(self._metrics):
                model.compile()
                errors = sgd.sgd(model,
                                 x_train,
                                 x_validate,
                                 y_train,
                                 y_validate,
                                 epochs=epochs,
                                 metric=metric,
                                 **kwargs)[1]
                self.errors_df.loc[tuple(
                    pd.IndexSlice[s]
                    for s in [metric.__doc__, *model_indexes])] = errors

        print("")

        self.errors_df.dropna(thresh=2, inplace=True)
        self.errors_df.to_csv("../dataframes/tune.csv")
Ejemplo n.º 16
0
def main():
    print("loading data...", end="", flush=True)
    data = load_data(DATA_FILEPATH)
    print(" done")

    train_set, cv_set, test_set = data

    x_tr, y_tr = train_set
    x_cv, y_cv = cv_set
    x_te, y_te = test_set

    """with open("/home/erik/db/data.pkl", "rb") as f:
        x, y = pickle.load(f)

    tr = 300
    x = np.array(x, dtype="float64")
    y = np.array(y, dtype="int32")
    x_tr, y_tr = x[:tr, :], y[:tr]
    x_cv, y_cv = x[tr:, :], y[tr:]
    x_te, y_te = x_cv, y_cv"""

    print("\ttrain:", x_tr.shape, y_tr.shape)
    print("\tcv:", x_cv.shape, y_cv.shape)
    print("\ttest:", x_te.shape, y_te.shape)

    x = tensor.matrix(name="x")
    y = tensor.ivector(name="y")

    layer_0_params = (
        {#conv
            "n_inp_maps": 1,
            "inp_maps_shape": (28, 28),
            #"inp_maps_shape": (48, 32),
            "n_out_maps": 5,
            #"n_out_maps": 4,
            "filter_shape": (7, 7),
        },
        {#pool
            "shape": (2, 2)
        }
    )

    layer_1_params = (
        {#conv
            #"n_in_maps": 4,
            "n_out_maps": 10,
            #"n_out_maps": 6,
            "filter_shape": (5, 5),
        },
        {#pool
            "shape": (2, 2)
        }
    )

    fully_connected_layer_params = {
        #"n_inp": 10*4*4,
        "n_hidden": 64,
        "n_out": 10
    }

    inp = x.reshape((x.shape[0], 1, 28, 28))
    #inp = x.reshape((x.shape[0], 1, 48, 32))

    load = False
    if load:
        print("loading model...")
        with open("cnn_model.pkl", "rb") as f:
            clf = pickle.load(f)
    else:
        clf = cnn.ConvolutionalNeuralNetwork(
            #inp=inp,
            inp=x,
            conv_pool_layers_params=[
                layer_0_params,
                layer_1_params],
            fully_connected_layer_params=fully_connected_layer_params)


        with_validation = True

        x_tr_sh = theano.shared(x_tr, borrow=True)
        y_tr_sh = theano.shared(y_tr, borrow=True)
        x_cv_sh = theano.shared(x_cv, borrow=True)
        y_cv_sh = theano.shared(y_cv, borrow=True)

        if with_validation:
            print("calling sgd_with_validation", flush=True)
            sgd.sgd_with_validation(clf,
                x_tr_sh, y_tr_sh, x_cv_sh, y_cv_sh,
                #learning_rate=0.003, reg_term=0.03, 95%
                learning_rate=0.003, reg_term=0.03,
                batch_size=100, n_epochs=32,
                max_its=20000, improv_thresh=0.01, max_its_incr=4,
                x=x,
                rel_val_tol=4e-3,
                val_freq="auto",
                verbose=True)
        else:
            print("calling sgd")
            sgd.sgd(clf, x_tr, y_tr,
                learning_rate=0.1,
                reg_term=1,
                batch_size=32,
                rel_tol=2e-3,
                n_epochs=128,
                verbose=True)

        print("saving model...")
        with open("cnn_model.pkl", "wb") as f:
            pickle.dump(clf, f)

    acc = theano.function([clf.inp, y], clf.score(y))
    te_len = x_te.shape[0]
    print("accuracy: %.2f%%" % (100*acc(
        np.reshape(x_te, (te_len, 1, 28, 28)),
        #np.reshape(x_te, (te_len, 1, 48, 32)),
        y_te)))
def main():

    print "############# Load Datasets ##############"

    import stanfordSentimentTreebank as sst

    skip_unknown_words = bool(args.get("--skip"))
    shuffle_flag = bool(args.get("--shuffle"))
    datatype = args.get("--datatype")
    if datatype == 5:
        # Fine-grained 5-class
        n_class = 5
    elif datatype == 2:
        # Binary 2-class
        n_class = 2

    # print "skip_unknown_words",skip_unknown_words
    vocab, index2word, datasets, datasets_all_sentences, funcs = sst.load_stanfordSentimentTreebank_dataset(normalize=True, skip_unknown_words=skip_unknown_words, datatype=datatype)
    train_set, test_set, dev_set  = datasets
    train_set_sentences, test_set_sentences, dev_set_sentences = datasets_all_sentences
    get,sentence2ids, ids2sentence = funcs # 関数を読み込み
    scores, sentences = zip(*train_set_sentences)
    sentences = [[word for word in sentence.lower().split()] for sentence in sentences]
    vocab_size = len(vocab)

 
    dev_unknown_count  = sum([unknown_word_count for score,(ids,unknown_word_count) in dev_set])
    test_unknown_count = sum([unknown_word_count for score,(ids,unknown_word_count) in test_set])

    train_set = [(score, ids) for score,(ids,unknown_word_count) in train_set]
    test_set  = [(score, ids) for score,(ids,unknown_word_count) in test_set]
    dev_set   = [(score, ids) for score,(ids,unknown_word_count) in dev_set]

    print "train_size : ", len(train_set)
    print "dev_size   : ", len(dev_set)
    print "test_size  : ", len(test_set)
    print "-"*30
    print "vocab_size: ", len(vocab)
    print "dev_unknown_words  : ", dev_unknown_count
    print "test_unknown_words : ", test_unknown_count



    
    print args

    # EMB_DIM = 50
    EMB_DIM = args.get("--emb_size")
    vocab_size = len(vocab)


    feat_map_n_1 = args.get("--feat_map_n_1")
    feat_map_n_final = args.get("--feat_map_n_final")

    height = 1
    width1 = args.get("--width1")
    width2 = args.get("--width2")
    k_top  = args.get("--k_top")
    n_class = n_class
    alpha   = args.get("--alpha")
    n_epoch = args.get("--n_epoch")
    dropout_rate0 = args.get("--dropout_rate0")
    dropout_rate1 = args.get("--dropout_rate1")
    dropout_rate2 = args.get("--dropout_rate2")
    activation = args.get("--activation")
    learn      = args.get("--learn")
    number_of_convolutinal_layer = 2
    use_regular = bool(args.get("--use_regular"))
    regular_c   = args.get("--regular_c")

    pretrain = args.get('--pretrain')
    if pretrain == 'word2vec':
        print "*Using word2vec"
        embeddings_W, model = pretrained_embedding.use_word2vec(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM)
        # -0.5 ~ 0.5で初期化している
    elif pretrain == 'glove':
        print "*Using glove"
        embeddings_W = pretrained_embedding.use_glove(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM, model_file='glove_model/glove_50_iter2900.model')
    else:
        embeddings_W = np.asarray(
            rng.normal(0, 0.05, size = (vocab_size, EMB_DIM)), 
            dtype = theano.config.floatX
        )
        embeddings_W[0,:] = 0

    print np.amax(embeddings_W)
    print np.amin(embeddings_W)
    # print "*embeddings"
    print embeddings_W
    # print bool(embeddings)

    # input_x = [1, 3, 4, 5, 0, 22, 4, 5]

    print "############# Model Setting ##############"    
    x = T.imatrix('x')
    length_x = T.iscalar('length_x')
    y = T.ivector('y') # the sentence sentiment label
    embeddings = WordEmbeddingLayer(rng=rng, 
                            input=x,
                            vocab_size=vocab_size, embed_dm=EMB_DIM, embeddings=embeddings_W)


    def dropout(X, p=0.5):
        if p > 0:
            retain_prob = 1 - p
            X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
            # X /= retain_prob
        return X
    # number_of_convolutinal_layer = theano.shared(number_of_convolutinal_layer)
    # dynamic_func = theano.function(inputs=[length_x], outputs=number_of_convolutinal_layer * length_x)

    # dynamic_func_test = theano.function(
    #     inputs = [length_x],
    #     outputs = dynamic_func(length_x),
    #     )
    # print dynamic_func(len([1,2,3]))

    l1 = DynamicConvFoldingPoolLayer(rng, 
                              input = dropout(embeddings.output, p=dropout_rate0), 
                              filter_shape = (feat_map_n_1, 1, height, width1),  # two feature map, height: 1, width: 2, 
                              k_top = k_top,
                              number_of_convolutinal_layer=number_of_convolutinal_layer,
                              index_of_convolitonal_layer=1,
                              length_x=length_x,
                              activation = activation
    )
    l1_no_dropout = DynamicConvFoldingPoolLayer(rng, 
                              input = embeddings.output,
                              W=l1.W * (1 - dropout_rate0),
                              b=l1.b,
                              filter_shape = (feat_map_n_1, 1, height, width1),  # two feature map, height: 1, width: 2, 
                              k_top = k_top,
                              number_of_convolutinal_layer=number_of_convolutinal_layer,
                              index_of_convolitonal_layer=1,
                              length_x=length_x,
                              activation = activation
    )


    l2 = DynamicConvFoldingPoolLayer(rng, 
                              input = dropout(l1.output, p=dropout_rate1), 
                              filter_shape = (feat_map_n_final, feat_map_n_1, height, width2),
                              # two feature map, height: 1, width: 2, 
                              k_top = k_top,
                              number_of_convolutinal_layer=number_of_convolutinal_layer,
                              index_of_convolitonal_layer=2,
                              length_x=length_x,
                              activation = activation
    )
    l2_no_dropout = DynamicConvFoldingPoolLayer(rng, 
                              input = l1_no_dropout.output,
                              W=l2.W * (1 - dropout_rate1),
                              b=l2.b,
                              filter_shape = (feat_map_n_final, feat_map_n_1, height, width2),
                              # two feature map, height: 1, width: 2, 
                              k_top = k_top,
                              number_of_convolutinal_layer=number_of_convolutinal_layer,
                              index_of_convolitonal_layer=2,
                              length_x=length_x,
                              activation = activation
    )


    # l2_output = theano.function(
    #     inputs = [x,length_x],
    #     outputs = l2.output,
    #     # on_unused_input='ignore'
    # ) 

    # TODO:
    # check the dimension
    # input: 1 x 1 x 6 x 4
    # out = l2_output(
    #     np.array([input_x], dtype = np.int32),
    #     len(input_x),
    # )


    # test = theano.function(
    #     inputs = [x],
    #     outputs = embeddings.output,
    # ) 


    # print "--input--"
    # print np.array([input_x], dtype = np.int32).shape
    # print "--input embeddings--"
    # a = np.array([input_x], dtype = np.int32)
    # print test(a).shape
    # print "-- output --"
    # print out
    # print out.shape



    # x = T.dscalar("x")
    # b = T.dscalar("b")
    # a = 1
    # f = theano.function(inputs=[x,b], outputs=b * x + a)
    # print f(2,2)


    # expected = (1, feat_map_n, EMB_DIM / 2, k)
    # assert out.shape == expected, "%r != %r" %(out.shape, expected)

    ##### Test Part Three ###############
    # LogisticRegressionLayer
    #################################

    # print "############# LogisticRegressionLayer ##############"

    l_final = LogisticRegression(
        rng, 
        input = dropout(l2.output.flatten(2), p=dropout_rate2),
        n_in = feat_map_n_final * k_top * EMB_DIM,
        # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2
        n_out = n_class, # five sentiment level
    )

    l_final_no_dropout = LogisticRegression(
        rng, 
        input = l2_no_dropout.output.flatten(2),
        W = l_final.W * (1 - dropout_rate2),
        b = l_final.b,
        n_in = feat_map_n_final * k_top * EMB_DIM,
        # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2
        n_out = n_class, # five sentiment level
    )


    print "n_in : ", feat_map_n_final * k_top * EMB_DIM
    # print "n_in = %d" %(2 * 2 * math.ceil(EMB_DIM / 2.))


    # p_y_given_x = theano.function(
    #     inputs = [x, length_x],
    #     outputs = l_final.p_y_given_x,
    #     allow_input_downcast=True,
    #     # mode = "DebugMode"
    # )

    # print "p_y_given_x = "
    # print p_y_given_x(
    #     np.array([input_x], dtype=np.int32),
    #     len(input_x)
    # )

    cost = theano.function(
        inputs = [x, length_x, y],
        outputs = l_final.nnl(y),
        allow_input_downcast=True,
        # mode = "DebugMode"
    )

    # print "cost:\n", cost(
    #     np.array([input_x], dtype = np.int32), 
    #     len(input_x),
    #     np.array([1], dtype = np.int32)
    # )

    
    print "############# Learning ##############"

    from sgd import sgd, rmsprop, adagrad, adadelta, adam
    from regularizer import regularize_l2

    layers = []
    layers.append(embeddings)
    layers.append(l1)
    layers.append(l2)
    layers.append(l_final)


    cost = l_final.nnl(y)
    params = [p for layer in layers for p in layer.params]
    param_shapes = [l.param_shapes for l in layers]
    param_grads = [T.grad(cost, param) for param in params]

    # regularizer setting
    regularizers = {}
    regularizers['c'] = regular_c # 2.0, 4.0, 15.0
    regularizers['func'] = [None for _ in range(len(params))]
    if use_regular:
        regularizers_func = []
        regularizers_func.append([regularize_l2(l=0.0001)]) # [embeddings]
        regularizers_func.append([regularize_l2(l=0.00003), None]) # [W, b]
        regularizers_func.append([regularize_l2(l=0.000003), None]) # [W, b]
        regularizers_func.append([regularize_l2(l=0.0001), None]) # [logreg_W, logreg_b]
        regularizers_func = [r_func for r in regularizers_func for r_func in r]
        regularizers['func'] = regularizers_func

    # if third conv layer: 1e-5
    
    print embeddings.params
    print l1.params
    print l2.params
    print l_final.params




    # updates = sgd(cost, l_final.params)
    # RegE = 1e-4
    # print param_grads
    if learn == "sgd":
        updates = sgd(cost, params, lr=0.05)
    elif learn == "adam":
        updates = adam(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers)
    elif learn == "adagrad":
        updates = adagrad(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers)
    elif learn == "adadelta":
        updates = adadelta(loss_or_grads=cost, params=params, regularizers=regularizers)
    elif learn == "rmsprop":
        updates = rmsprop(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers)


    train = theano.function(inputs=[x, length_x, y], outputs=cost, updates=updates, allow_input_downcast=True)
    # predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    predict = theano.function(
        inputs = [x, length_x],
        outputs = T.argmax(l_final_no_dropout.p_y_given_x, axis=1),
        allow_input_downcast=True,
        # mode = "DebugMode"
    )




    def b(x_data):
        return np.array(x_data, dtype=np.int32)


    def test(test_set):
        # print "############# TEST ##############"
        y_pred = []
        test_set_y = []
        # for train_x, train_y in zip(X_data, Y_data):
        # print test_set
        # Accuracy_count = 0
        for test_y,test_x in test_set:
            test_x = b([test_x])
            p = predict(test_x, len(test_x))[0]
            y_pred.append(p)
            test_set_y.append(test_y)

            # if test_y == p:
            #     Accuracy_count += 1

            # print "*predict :",predict(train_x, len(train_x)), train_y 
        # Accuracy = float(Accuracy_count) / len(test_set)
        # print "  accuracy : %f" % Accuracy, 
        return accuracy_score(test_set_y, y_pred)
        # print classification_report(test_set_y, y_pred)

    # train_set_rand = np.ndarray(train_set)
    train_set_rand = train_set[:]
    train_cost_sum = 0.0
    for epoch in xrange(n_epoch):
        print "== epoch : %d =="  % epoch
        if shuffle_flag:
            np.random.shuffle(train_set_rand)
            # train_set_rand = np.random.permutation(train_set)
        for i,x_y_set in enumerate(train_set_rand):
            train_y, train_x = x_y_set
            train_x = b([train_x])
            train_y = b([train_y])

            train_cost = train(train_x, len(train_x) , train_y)
            train_cost_sum += train_cost
            if i % 1000 == 0 or i == len(train_set)-1:
                print "i : (%d/%d)" % (i, len(train_set)) , 
                print " (cost : %f )" % train_cost
        
        print '  cost :', train_cost_sum
        print '  train_set : %f' % test(train_set)
        print '  dev_set   : %f' % test(dev_set)
        print '  test_set  : %f' % test(test_set)





    '''
Ejemplo n.º 18
0
    # Training should happen here
    # Initialize parameters randomly
    # Construct the params
    input_dim = 50
    hidden_dim = 50
    output_dim = vocabsize
    dimensions = [input_dim, hidden_dim, output_dim]
    params = np.random.randn(
        (input_dim + 1) * hidden_dim + (hidden_dim + 1) * output_dim, )
    print(f"#params: {len(params)}")
    print(f"#train examples: {num_of_examples}")

    # run SGD
    params = sgd(
        lambda vec: lm_wrapper(in_word_index, out_word_index,
                               num_to_word_embedding, dimensions, vec), params,
        LEARNING_RATE, NUM_OF_SGD_ITERATIONS, None, True, 1000)

    print(f"training took {time.time() - startTime} seconds")

    # Evaluate perplexity with dev-data
    perplexity = eval_neural_lm('data/lm/ptb-dev.txt')
    print(f"dev perplexity : {perplexity}")

    # Evaluate perplexity with test-data (only at test time!)
    if os.path.exists('data/lm/ptb-test.txt'):
        perplexity = eval_neural_lm('data/lm/ptb-test.txt')
        print(f"test perplexity : {perplexity}")
    else:
        print("test perplexity will be evaluated only at test time!")
Ejemplo n.º 19
0
                                    test_bs=args.test_batch_size)

# make sure to use cudnn.benchmark for second backprop
cudnn.benchmark = True

# get model and optimizer
model = resnet(num_classes=10, depth=args.depth).cuda()
print(model)
model = torch.nn.DataParallel(model)
print('    Total params: %.2fM' %
      (sum(p.numel() for p in model.parameters()) / 1000000.0))

criterion = nn.CrossEntropyLoss()
if args.optimizer == 'sgd':
    optimizer = sgd(model.parameters(),
                    lr=args.lr,
                    momentum=0.9,
                    weight_decay=args.weight_decay)
elif args.optimizer == 'adam':
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
elif args.optimizer == 'adamw':
    print(
        'For AdamW, we automatically correct the weight decay term for you! If this is not what you want, please modify the code!'
    )
    args.weight_decay = args.weight_decay / args.lr
    optimizer = optim.AdamW(model.parameters(),
                            lr=args.lr,
                            weight_decay=args.weight_decay)
elif args.optimizer == 'adahessian':
    print(
Ejemplo n.º 20
0
    # generate test data
    A_test = np.random.randn(n, d)
    y_test = np.sign(np.dot(A_test, x_true))

    # preprocess data
    tmp = lil_matrix((n, n))
    tmp.setdiag(y)
    data = theano.shared(tmp * A)

    # define objective function and gradient via Theano
    l2 = 1e-2
    par = T.vector()
    loss = T.log(1 + T.exp(-T.dot(data, par))).mean() + l2 / 2 * (par**2).sum()
    func = theano.function(inputs=[par], outputs=loss)

    idx = T.ivector()
    grad = theano.function(inputs=[par, idx],
                           outputs=T.grad(loss, wrt=par),
                           givens={data: data[idx, :]})

    print('\nBegin to run SGD:')
    x = sgd(grad, 1e-3, n, d, phi=lambda k: k, func=func, max_epoch=50)
    y_predict = np.sign(np.dot(A_test, x))
    print('Test accuracy: %f' % (np.count_nonzero(y_test == y_predict) / n))

    print('\nBegin to run SGD-mom:')
    x = sgd_mom(grad, 1e-3, n, d, phi=lambda k: k, func=func, max_epoch=50)
    y_predict = np.sign(np.dot(A_test, x))
    print('Test accuracy: %f' % (np.count_nonzero(y_test == y_predict) / n))
Ejemplo n.º 21
0
# Context size
C = 5

# Reset the random seed to make sure that everyone gets the same results
random.seed(31415)
np.random.seed(9265)

startTime = time.time()
wordVectors = np.concatenate(
    ((np.random.rand(nWords, dimVectors) - 0.5) /
     dimVectors, np.zeros((nWords, dimVectors))),
    axis=0)

wordVectors = sgd(
    lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
                                     negSamplingLossAndGradient),
    wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
# Note that normalization is not called here. This is not a bug,
# normalizing during training loses the notion of length.

print("sanity check: cost at convergence should be around or below 10")
print("training took %d seconds" % (time.time() - startTime))

# concatenate the input and output word vectors
wordVectors = np.concatenate(
    (wordVectors[:nWords, :], wordVectors[nWords:, :]),
    axis=0)

visualizeWords = [
    "great", "cool", "brilliant", "wonderful", "well", "amazing",
    "worth", "sweet", "enjoyable", "boring", "bad", "dumb",
Ejemplo n.º 22
0
    sum = 0
    for i in range(0, maxi):
        sum = sum + fiprimeprime(x, i)
    return sum


if __name__ == "__main__":
    #this is just to see the function, you don't have to use this plotting code
    xvals = np.arange(-10, 10, 0.01)  # Grid of 0.01 spacing from -10 to 10
    yvals = fsum(xvals)  # Evaluate function on xvals
    plt.plot(xvals, yvals)  # Create line plot with yvals against xvals

    #this is the timing code you should use
    start = time.clock()
    #my sgd code
    x = sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000)
    end = time.clock()
    print "Result from sgd = %.5f" % (x[-1])
    print "xval corresponding to the minimal yval = %.5f" % (
        xvals[np.argmin(yvals)])
    print "Time: ", end - start

    plt.show()  #show the plot

    # Plot how does SGD goes
    # plt.plot(x)
    # plt.plot([0,1000],[6.4,6.4])
    # plt.xlabel("Number of iterations")
    # plt.ylabel("x")
    # plt.show()
    sgd_accuracy_list = []
    svm_accuracy_list = []
    logreg_accuracy_list = []
    print("Generating accoracy values")
    for i in range(n_iter):

        print("Iteration", i)
        X_train, X_valid, y_train, y_valid = train_test_split(train_data_clean,
                                                              targets,
                                                              train_size=0.8,
                                                              test_size=0.2,
                                                              shuffle=True)

        var = sgd(max_df=0.5,
                  reset_train_test_split=True,
                  X_train_arg=X_train,
                  y_train_arg=y_train,
                  X_valid_arg=X_valid,
                  y_valid_arg=y_valid)
        sgd_accuracy_list.append(var)

        var = svm(max_df=0.5,
                  reset_train_test_split=True,
                  X_train_arg=X_train,
                  y_train_arg=y_train,
                  X_valid_arg=X_valid,
                  y_valid_arg=y_valid)
        svm_accuracy_list.append(var)

        var = log_reg(max_df=0.5,
                      reset_train_test_split=True,
                      X_train_arg=X_train,
Ejemplo n.º 24
0
def trainSgd(name, dic, x,C,iterations=None):
    y = build_labels(name,dic)
    if iterations==None:
        iterations = 10
    w = sgd.sgd(x,y,np.zeros(len(x[0])+1),iterations,1,sgd.L,0.01,C)
    return w
Ejemplo n.º 25
0
model1.addLayer(neural_model.Input(64))
model1.addLayer(neural_model.Dense(64, activations=activations.relus))
model1.addLayer(neural_model.Output(10, d_func=lambda a, y, _: y - a))
model1.compile()

model2.addLayer(neural_model.Input(64))
model2.addLayer(neural_model.Dense(32, activations=activations.relus))
model2.addLayer(neural_model.Output(10, d_func=lambda a, y, _: y - a))
model2.compile()

errors_1 = sgd(model1,
               x_train,
               x_test,
               y_train,
               y_test,
               epochs=5000,
               epochs_without_progress=500,
               mini_batch_size=40,
               metric=metrics.accuracy)[1]
errors_2 = sgd(model1,
               x_train,
               x_test,
               y_train,
               y_test,
               epochs=5000,
               epochs_without_progress=500,
               mini_batch_size=40,
               metric=metrics.accuracy)[1]

print("Model1: 64x64x10, ReLu activation")
Ejemplo n.º 26
0
def main():
    print("loading data...", end="", flush=True)
    data = load_data(DATA_FILEPATH)
    print(" done")

    train_set, cv_set, test_set = data

    x_tr, y_tr = train_set
    x_cv, y_cv = cv_set
    x_te, y_te = test_set

    print("\ttrain:", x_tr.shape, y_tr.shape)
    print("\tcv:", x_cv.shape, y_cv.shape)
    print("\ttest:", x_te.shape, y_te.shape)

    x = tensor.matrix(name="x")
    y = tensor.ivector(name="y")

    clf = mlp.MultiLayerPerceptron(x,
                                   n_inp=x_tr.shape[1],
                                   n_hidden=64,
                                   n_out=10)

    acc = theano.function([x, y], clf.score(y))

    with_validation = True

    x_tr_sh = theano.shared(x_tr, borrow=True)
    y_tr_sh = theano.shared(y_tr, borrow=True)
    x_cv_sh = theano.shared(x_cv, borrow=True)
    y_cv_sh = theano.shared(y_cv, borrow=True)

    if with_validation:
        print("calling sgd_with_validation", flush=True)
        sgd.sgd_with_validation(clf,
                                x_tr_sh,
                                y_tr_sh,
                                x_cv_sh,
                                y_cv_sh,
                                learning_rate=0.01,
                                reg_term=0.00005,
                                batch_size=256,
                                n_epochs=1000,
                                max_its=5000,
                                improv_thresh=0.01,
                                max_its_incr=4,
                                rel_val_tol=5e-3,
                                val_freq="auto",
                                verbose=True)
        print("accuracy: %.2f%%" % (100 * acc(x_te, y_te)))
    else:
        print("calling sgd")
        sgd.sgd(clf,
                x_tr_sh,
                y_tr_sh,
                learning_rate=0.1,
                reg_term=1,
                batch_size=32,
                n_epochs=128,
                rel_tol=2e-3,
                verbose=True)
        print("accuracy: %.2f%%" % (100 * acc(x_tr, y_tr)))
Ejemplo n.º 27
0
devFeatures = np.zeros((nDev, dimVectors))
devLabels = np.zeros((nDev,), dtype=np.int32)
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
        weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
Ejemplo n.º 28
0
from plot_tools import max_df_plot
from log_reg import log_reg
from svm import svm
from sgd import sgd
import pickle
import os
import numpy as np

max_df_list = np.linspace(0.05, 1, 20)

if not os.path.isfile("max_df_%d.cPickle" % len(max_df_list)):
    print("Generate ")
    max_df_logreg = []
    max_df_svm = []
    max_df_sgd = []
    counter = 0
    for max_df in max_df_list:
        print("Iteration", counter)
        max_df_logreg.append(log_reg(max_df))
        max_df_svm.append(svm(max_df))
        max_df_sgd.append(sgd(max_df))
        counter += 1
    pickle.dump((max_df_list, max_df_logreg, max_df_svm, max_df_sgd),
                open("max_df_%d.cPickle" % len(max_df_list), 'wb'))
else:
    max_df_list, max_df_logreg, max_df_svm, max_df_sgd = pickle.load(
        open("max_df_%d.cPickle" % len(max_df_list), 'rb'))

max_df_plot(max_df_list, max_df_logreg, max_df_svm, max_df_sgd)