コード例 #1
0
ファイル: Linear.py プロジェクト: Kartepolo/Feature-Machine
def crossvalidate(data, output, lam, pd):
    kf = KFold(data.shape[1], n_folds= 5)
    L = []
    L.append({"type":'NLayer',"params":{"size": data.shape[0], "ac_func":'sigmoid', "lam": lam,"sparsity_param":0} })
    L.append({"type":'Regressor',"params":{"size": 1, "ac_func":'sigmoid', "lam": lam,"sparsity_param":0} })
    lr = model(Layers = L,lam = lam, beta = 0)
    err = []
    for train_index, test_index in kf:
        data_train, data_test = data[:,train_index], data[:,test_index]
        output_train, output_test = output[train_index], output[test_index]
        data_train = norm(data_train,'norm')
        data_test = norm(data_test,'norm')
        result = lr.naive_SGD(data = data_train, output = output_train, **pd)
        pred = lr.predict(data_test)
        err.append(1.0 * np.sum((pred - output_test) ** 2) / data_test.shape[1])
    return 1.0 * sum(err) / len(err)
コード例 #2
0
ファイル: Linear.py プロジェクト: Kartepolo/Feature-Machine
def hundredcv():
    with open('mortality-dataset.csv','rU') as f:
        f_csv = csv.reader(f)
        headers = next(f_csv)
        counter = 0
        result = []
        for row in f_csv:
            result.append(row)
            counter +=1
        f.close()
    cols = len(headers)
    result = np.array(result,dtype=np.float).transpose()
    data = result[1:,]
    output = result[0,].transpose()
    summary = []
    L = []
    L.append({"type":'NLayer',"params":{"size": cols - 1, "ac_func":'sigmoid', "lam": 0,"sparsity_param":0} })
    L.append({"type":'Regressor',"params":{"size": 1, "ac_func":'sigmoid', "lam": 0,"sparsity_param":0} })
    lr = model(Layers = L,lam = 0, beta = 0)
    err = []
    ndata = norm(data, 'norm')
    for i in xrange(100):
        result0 = lr.naive_SGD(data = ndata, output = output, decay = 0.99,l_rate = 0.5,batches = 1)
        err.append(result0['cost'][len(result0['cost']) - 1])
    summary.append(err)
    pd = {'decay' : 0.99, 'l_rate' : 0.5, 'batches' : 1, 'Uafterbatch' : True}
    err = []
    for i in xrange(100):
        err.append(crossvalidate(data,output, lam = 0, pd = pd))
    summary.append(err)
    #CV for decay
    for d in [0.7,0.75,0.8,0.85,0.9,0.95,0.99]:
        cv_error = []
        pd['decay'] = d
        for i in xrange(100):
            cv_error.append(crossvalidate(data,output, lam = 0, pd = pd))
        summary.append((cv_error, d))
    #CV for step
    pd['decay'] = 0.85
    for l in [0.1,0.3,0.5,0.7,0.9]:
        cv_error = []
        pd['l_rate'] = l
        for i in xrange(100):
            cv_error.append(crossvalidate(data,output, lam = 0, pd = pd))
        summary.append((cv_error, l))
    p0, = plt.plot(xrange(1, 101) , summary[0])
    p1, = plt.plot(xrange(1, 101) , summary[1])
    plt.legend([p0,p1],['Training Error','5-Fold CV Error'])
    plt.ylabel('Mean of Squared Error')
    plt.xlabel('Runs of Experiment')
    plt.title('Error Comparison with Decay = 0.9, Step = 0.5')
    plt.show()
    mean = [1.0 * sum(x[0])/len(x[0]) for x in summary[2:9]]
    Decays = [x[1] for x in summary[2:9]]
    plt.plot(Decays, mean)
    plt.ylabel('Averaged MSE over 100 Experiments')
    plt.xlabel('Decay')
    plt.title('CV Error for different Decay')
    plt.show()
    mean = [1.0 * sum(x[0])/len(x[0]) for x in summary[9:14]]
    Steps = [x[1] for x in summary[9:14]]
    plt.plot(Steps, mean)
    plt.ylabel('Averaged MSE over 100 Experiments')
    plt.xlabel('Step')
    plt.title('CV Error for different Step')
    plt.show()
    print 'End'
コード例 #3
0
ファイル: Linear.py プロジェクト: Kartepolo/Feature-Machine
def test():
    with open('mortality-dataset.csv','rU') as f:
        f_csv = csv.reader(f)
        headers = next(f_csv)
        counter = 0
        result = []
        for row in f_csv:
            result.append(row)
            counter +=1
        f.close()
    cols = len(headers)
    result = np.array(result,dtype=np.float).transpose()
    data = result[1:,]
    output = result[0,].transpose()
    L = []
    L.append({"type":'NLayer',"params":{"size": cols - 1, "ac_func":'sigmoid', "lam": 1,"sparsity_param":0} })
    L.append({"type":'Regressor',"params":{"size": 1, "ac_func":'sigmoid', "lam": 1,"sparsity_param":0} })
    lr = model(Layers = L,lam = 0, beta = 0)
    result1 = lr.naive_SGD(data = data, output = output, decay = 0.99, norm_method='max', l_rate = 0.7,batches = 53, Uafterbatch= False)
    result2 = lr.naive_SGD(data = data, output = output, decay = 0.99, norm_method='norm', l_rate = 0.7,batches = 1, Uafterbatch= False)
    norm_max, = plt.plot(xrange(1, len(result1['cost']) + 1) , result1['cost'],'b')
    norm_norm, = plt.plot(xrange(1, len(result2['cost']) + 1) , result2['cost'],'g')
    plt.legend([norm_max,norm_norm],['Max','Z-score'])
    plt.ylabel('Average SSE')
    plt.xlabel('Runs of Batch Updating')
    plt.title("Comparison of Normalization Methods")
    plt.show()

    result1 = lr.naive_SGD(data = data, output = output, decay = 0.8, norm_method='norm', l_rate = 0.1,batches = 1, Uafterbatch= True)
    result2 = lr.naive_SGD(data = data, output = output, decay = 0.8, norm_method='norm', l_rate = 0.1,batches = 5, Uafterbatch= True)
    result3 = lr.naive_SGD(data = data, output = output, decay = 0.8, norm_method='norm', l_rate = 0.1,batches = 53, Uafterbatch= True)
    b1, = plt.plot(xrange(1, len(result1['cost']) + 1) , result1['cost'],'b')
    b5, = plt.plot(xrange(1, len(result2['cost']) + 1) , result2['cost'],'g')
    b53, = plt.plot(xrange(1, len(result3['cost']) + 1) , result3['cost'],'r')
    plt.legend([b1,b5,b53],['Batch','Every 5','Each'])
    plt.ylabel('Average SSE')
    plt.xlabel('Runs of Batch Updating')
    plt.title("Impact of Epoch Size")
    plt.show()


    Decay = [0.99, 0.96]
    Norm = ['max','norm']
    Batch = [1, 5]
    Uafter = [True, False]
    l_rate = [0.3,0.7]
    Mean_Max_min= {}
    for i in xrange(16):
        Mean_Max_min[i + 1] = []
    counter = 1
    for d in Decay:
        for n in Norm:
            for l in l_rate:
                for u in Uafter:
                    for b in Batch:
                        Experiements = []
                        for runs in xrange(100):
                            result = lr.naive_SGD(data = data, output = output, decay = d, norm_method=n, l_rate = l,batches = b, Uafterbatch= u)
                            Experiements.append(result['cost'])
                        final = [min(s) for s in Experiements]
                        mean = sum(final) * 1.0 / len(final)
                        Mean_Max_min[counter].append([(mean, max(final), min(final)),(d,n,l,u)])
                    counter += 1

    print 'aaaa'