Beispiel #1
0
def doit(dic,priors,classes,K,diag):
    err = {'train':list(), 'test':list()}
    for k in K:
        print '*'*15,'K =',str(k),'*'*15
        nums, means, covs, nll  = {},{},{},{}
        # Build GMM models
        for dif in dic['train']:
            data = pack(dic['train'][dif])
            for i in xrange(6):
                _nums,_means,_covs,_nll = gmm.gmm(data, weights=None, K=k, hard=True, diagcov=diag)
                if(i != 0):
                    if(_nll > nll[dif]):
                        continue
                nums[dif],means[dif],covs[dif],nll[dif] =  _nums,_means,_covs,_nll
        
        criteria = [snll for dif in dic['train']]
        kwparams = [{'nums':nums[dif], 'means':means[dif], 'covs':covs[dif], 'prior':priors[dif]} for dif in dic['train']]
        
        # Evaluate
        for x in dic:
            labels, labels_est = [], []
            for dif in dic[x]:
                points = dic[x][dif]
                labels += [dif for i in xrange(len(points))]
                labels_est += optcriterion(points, classes, criteria, kwparams=kwparams, _max=False);
            e = 100.0*sum( np.array(labels) != np.array(labels_est) ) / len(labels)
            err[x].append( e )
            
            print 'Confusion marix for' , x , 'data','(K={:},diagcov={:})'.format(k,diag)
            utils.confusion(labels, labels_est, True)
            print '% Error: ', e,'\n'
    if(len(K) > 1):
        pl.plot(K,err['train'],'--', label= 'Train'+(' (diagcov=True)' if diag else ''))
        pl.plot(K,err['test'], label= 'Test'+(' (diagcov=True)' if diag else ''))
Beispiel #2
0
def doit(dic, priors, classes, K, diag):
    err = {'train': list(), 'test': list()}
    for k in K:
        print '*' * 15, 'K =', str(k), '*' * 15
        nums, means, covs, nll = {}, {}, {}, {}
        # Build GMM models
        for dif in dic['train']:
            data = pack(dic['train'][dif])
            for i in xrange(6):
                _nums, _means, _covs, _nll = gmm.gmm(data,
                                                     weights=None,
                                                     K=k,
                                                     hard=True,
                                                     diagcov=diag)
                if (i != 0):
                    if (_nll > nll[dif]):
                        continue
                nums[dif], means[dif], covs[dif], nll[
                    dif] = _nums, _means, _covs, _nll

        criteria = [snll for dif in dic['train']]
        kwparams = [{
            'nums': nums[dif],
            'means': means[dif],
            'covs': covs[dif],
            'prior': priors[dif]
        } for dif in dic['train']]

        # Evaluate
        for x in dic:
            labels, labels_est = [], []
            for dif in dic[x]:
                points = dic[x][dif]
                labels += [dif for i in xrange(len(points))]
                labels_est += optcriterion(points,
                                           classes,
                                           criteria,
                                           kwparams=kwparams,
                                           _max=False)
            e = 100.0 * sum(
                np.array(labels) != np.array(labels_est)) / len(labels)
            err[x].append(e)

            print 'Confusion marix for', x, 'data', '(K={:},diagcov={:})'.format(
                k, diag)
            utils.confusion(labels, labels_est, True)
            print '% Error: ', e, '\n'
    if (len(K) > 1):
        pl.plot(K,
                err['train'],
                '--',
                label='Train' + (' (diagcov=True)' if diag else ''))
        pl.plot(K,
                err['test'],
                label='Test' + (' (diagcov=True)' if diag else ''))
			templistlen = []
			for c in range (len(keyList)):
				templistlen.append(hmm.negloglik(temp,trans = MarkovModel[c][0],dists = MarkovModel[c][1]))
			#lassifiedtests.append(keyList[(np.argmin(templistlen))])
			TestClassification.append(keyList[(np.argmin(templistlen))])
			OriginalTestClassification.append(keyList[a])
		llist = getlistoflengths(traindataStack[a])
		for b in range (len(llist)):
			temp = np.array([traindataStack[a][b]])
			templistlen = []
			for c in range (len(keyList)):
				templistlen.append(hmm.negloglik(temp,trans = MarkovModel[c][0],dists = MarkovModel[c][1]))
			TrainClassification.append(keyList[(np.argmin(templistlen))])
			OriginalTrainClassification.append(keyList[a])
	print "Test Confusion Matrix"
	utils.confusion(OriginalTestClassification,TestClassification)
	print "Train Confusion Matrix"
	utils.confusion(OriginalTrainClassification,TrainClassification)
	print k
diagcov = True
print "Diagonal Covariance Matrix"
for k in range (1,7):
	MarkovModel = []
	for a in traindataStack:
		trans = hmm.lrtrans(k)
		llist = getlistoflengths(a)
		MarkovModel.append(hmm.hmm(np.column_stack(a),llist,trans,diagcov = diagcov))
#	print np.shape(testdataStack)
#	print np.shape(testdataStack[0][0])
	TestClassification = []
	OriginalTestClassification = []
Beispiel #4
0
'''
Tune:
spp layer
bilinear: 注意对称(kernel pooling)和不对称(理想?两个cnn学习不同的特征)的情形,还有很多地方(不同想法的组合)没人探索过
SE-net
confusion (label smoothing)
lr

stn
    http://blog.csdn.net/xbinworld/article/details/69049680
    http://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html

kaggle
    EDA
        https://www.kaggle.com/muonneutrino/exploration-transforming-images-in-python
        https://www.kaggle.com/submarineering/submarineering-size-matters-0-75-lb
        https://www.kaggle.com/keremt/getting-color-composites
        https://www.kaggle.com/asindico/icebergs-and-ships-eda-and-augmentation  (PIL)
        https://www.kaggle.com/dimitrif/other-sentinel-data (数据经过了啥处理)
    background
        https://www.kaggle.com/devm2024/keras-model-for-beginners-0-210-on-lb-eda-r-d (数据采集原理)
        https://www.kaggle.com/jgroff/despeckling-synthetic-aperture-radar-sar-images (去噪)
        https://www.kaggle.com/dimitrif/domain-knowledge
        https://www.kaggle.com/plarmuseau/how-to-use-the-angle-imho
    angle
        https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/discussion/46195#261600
        https://www.kaggle.com/brassmonkey381/viewing-leak-and-machine-images
    Pseudo-labeling
        https://towardsdatascience.com/simple-explanation-of-semi-supervised-learning-and-pseudo-labeling-c2218e8c769b
        https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/discussion/45852
    OriginalTestClassification = []
    TrainClassification = []
    OriginalTrainClassification = []
    for a in range(0, 5):
        for b in range(3, len(signatures[a])):
            OriginalTestClassification.append(a)
            templist = []
            for c in range(0, 5):
                #print np.shape(np.array([np.column_stack(signatures[a][b]).T]))
                templist.append(
                    hmm.negloglik(np.array(
                        [np.column_stack(signatures[a][b]).T]),
                                  trans=MarkovModel[c][0],
                                  dists=MarkovModel[c][1]))
            TestClassification.append(np.argmin(templist))
    utils.confusion(OriginalTestClassification, TestClassification)
    print "completed for k = %d" % k
diagcov = True
#print np.shape(signatures)
#print np.shape(np.column_stack(signatures[1][0:3]).T)
print "Diagonal Covariance Matrix"
for k in range(1, 7):
    MarkovModel = []
    for a in range(0, 5):
        trans = hmm.lrtrans(k)
        llist = []
        for b in range(0, 3):
            #                       print np.shape(signatures[a][b])
            llist.append([len(signatures[a][b])])
        MarkovModel.append(
            hmm.hmm(np.column_stack(signatures[a][0:3]),
Beispiel #6
0
            hidden_num -= 2
            new_rnn = reduced_rnn_net(old_rnn, int(row['row']),
                                      int(row['col']), hidden_num)

            print(
                "\n======= RNN hidden size: {}==========\n".format(hidden_num))

            start_time = time.time()
            # Unsqueeze from 2-dimension to 3-dimension to match the rnn model.
            acc, pred = test_model(new_rnn, flat_input_test, y_test,
                                   test_seq_lens)
            stop_time = time.time()
            print("Execution time: %s ms" % ((stop_time - start_time) * 1000))
            times.append((stop_time - start_time) * 1000)

            mat = confusion(x_test.size(0), 3, pred, y_test)
            F1_score(mat)

            # Save the new network and evaluate its vector angle.
            rnns.append(new_rnn)
            old_rnn = new_rnn

            saveNNParas(new_rnn, x_test, hidden_num)
            vectors = pd.read_excel('vector_angle.xls', header=None)
            if (vectors.empty):
                cnt = 10
                print("\n Finished: Vectors are empty! \n")
                break

            df = pd.DataFrame({
                'row': vectors.iloc[:, 0],
# Various sequence length used for padding sequence and packed sequence in rnn modol.
l = [1104, 1028, 980, 964, 960, 956, 956, 932, 868, 840, 836, 808]
train_seq_lens = np.zeros((12 * 12))
for i in range(len(l)):
    train_seq_lens[i * 12:(i + 1) * 12] = l[i]
test_seq_lens = np.zeros((4 * 12))
for i in range(len(l)):
    test_seq_lens[i * 4:(i + 1) * 4] = l[i]

# Unsqueeze from 2-dimension to 3-dimension to match the rnn model.
flat_input_train = input_train.unsqueeze(-1)
train_model(rnn,
            flat_input_train,
            label_train,
            train_seq_lens,
            lr=lr,
            epochs=epochs)
flat_input_test = input_test.unsqueeze(-1)
acc, pred = test_model(rnn, flat_input_test, label_test, test_seq_lens)

# if accuracy > 40:
#    all_weight = rnn.rnn.all_weights.data
#    saveExcel(all_weight, 'all_weight.xls', u'sheet1')
#    saveDataset(input_test, label_test)

mat = confusion(input_test.size(0), output_dim, pred, label_test)
print("Confusion Matrix:")
print(mat)
F1_score(mat)
#normalized = min_max_norm(raw_data, 14)
#normalized.to_excel('music-features-processed.xlsx')

X_train, Y_train, X_test, Y_test = load_data(
    'music-affect_v1/music-features-processed.xlsx',
    features_num,
    label_loc,
    features_selector=selector,
    spliting_ratio=0.8)

net = Net(features_num, hidden_num, classes_num)
train_model(net, X_train, Y_train, lr=learning_rate, epochs=epochs_num)

start_time = time.time()
accuracy, Y_pred = test_model(net, X_test, Y_test)
print("Execution time: %s ms" % ((time.time() - start_time) * 1000))

#Save relevant parameter for analysis.
if accuracy > 40:
    saveNNParas(net, X_test, hidden_num)
    torch.save(net.state_dict(), 'net_model.pt')
    saveDataset(X_train, Y_train, 'training')
    saveDataset(X_test, Y_test, 'testing')

mat = confusion(X_test.size(0), classes_num, Y_pred, Y_test)
print("Confusion Matrix:")
print(mat)
F1_score(mat)

print("\n========================== END ==================================")