Beispiel #1
0
def main():

    # Here in our main method we simply load and scale the
    # data, one hot encode the labels and then initialise
    # our deep neural net. 

    # Below we test our network with both activation functions,
    # 'ReLU' and 'Tanh'.

    train_data = pd.read_csv('mnist_train.csv')
    test_data = pd.read_csv('mnist_test.csv')

    train_labels = train_data.iloc[:,0]
    test_labels = test_data.iloc[:,0]
    train_labels = one_hot(train_labels, 10)

    train_data = train_data.iloc[:, 1:] / 255.00
    test_data = test_data.iloc[:, 1:] / 255.00

    train_data = train_data.to_numpy()
    test_data = test_data.to_numpy()

    dnn_relu = DeepNeuralNet(0.001, 784, 2, 28, 10, 5, "relu", "MEGATRON!")

    dnn_relu.fit(train_data, train_labels)

    predictions = []

    for item in test_data:
        p = list(dnn_relu.predict(item))
        predictions.append(p.index(max(p)))

    dnn_relu.accuracy_scores(predictions, test_labels)

    print("\n\n=============\n\n")

    dnn_tanh = DeepNeuralNet(0.001, 784, 2, 28, 10, 5, "tanh", "MEGATRON!")

    dnn_tanh.fit(train_data, train_labels)

    predictions = []

    for item in test_data:
        p = list(dnn_tanh.predict(item))
        predictions.append(p.index(max(p)))

    dnn_tanh.accuracy_scores(predictions, test_labels)
Beispiel #2
0
def one_hot_encode(messages, dimension):
    data = []
    for msg in messages:
        temp = one_hot(msg, vocabulary_lenght)
        data.append(temp)
    return data
Beispiel #3
0
def getXY(input, algo, model, test=0):
	"""
	input: 预处理过的语料库
	algo: 使用的特征权重计算方法名
	model: 使用的模型名	

	test = 0 : 记录文件中出现的词汇并构造词汇表(训练集)
	test = 1 : 不构造词汇表,用已经构造好的(测试集)
	
	"""
	global package
	corpus = preprocess(input, package, test)
	labelset = package["labelset"]
	voca = package["voca"]
	
	level = 2
	mod = 0
	if algo == "tf_idf":
		weights = tf_idf(corpus,test,package)
		mod=1
	elif algo == "tf_dc":
		weights = tf_dc(corpus,test,package)
	elif algo == "tf_bdc":
		weights = tf_bdc(corpus,test,package)
	elif algo == "iqf_qf_icf":
		weights = iqf_qf_icf(corpus,test,package)
	elif algo == "tf_eccd":
		weights = tf_eccd(corpus,test,package)
	elif algo == "tf_ig":
		weights = tf_ig(corpus,test,package)
	elif algo == "tf_rf":
		weights = tf_rf(corpus,test,package)
		level = 3
	elif algo == "tf_chi":
		weights = tf_chi(corpus,test,package)
		level = 3
	#print weights 
	X = []
	Y = []
	count = 0
	vocalen = len(voca)
	for doc in corpus:
		if count%100 ==0:
			print str(count) + "/" + str(len(corpus)) 
		count+=1
		# process label
		labelset.append(doc["label"])
		Y.append(int(np.argmax(one_hot(labelset)[-1])))
		labelset = labelset[:-1]
		
		# process word
		temvocalist = voca + doc["split_sentence"]
		tem_one_hot = one_hot(temvocalist)[vocalen:]
		for word in range(len(tem_one_hot)):
			temlabel = doc["label"]
			temword = doc["split_sentence"][word]
			temdoc = doc["document"]
			if level == 2:
				if mod ==0:
					tem_one_hot[word] *= weights[temlabel][temword]
				else:
					tem_one_hot[word] *= weights[temdoc][temword]
			else:
				tem_one_hot[word] *= weights[temlabel][temdoc][temword]

		tem_one_hot = np.max(tem_one_hot,axis=0)
		if (model.lower()=="knn"):
			tem_one_hot = preprocessing.normalize(np.array(tem_one_hot).reshape(1,-1), norm='l2')
		X.append(tem_one_hot)

	return np.squeeze(X),Y
Beispiel #4
0

# neural networks
# 先将y转化为one-hot形式, 即(len(y), np.unique(y).shape]) ==> (30804, 3)
def expand(y, kind):
    res = []
    for y_i in y:
        y_array = np.zeros(kind)
        y_array[y_i + 1] = 1
        res.append(y_array)
    return np.array(res)


y_train_softmax = expand(y_train, 3)
print(y_train_softmax.shape)
# theta1(25, 101)  theta2(3, 26)
final_theta = NeuralNetworks.neural_network(x_train, y_train_softmax)
print('neural network accuracy: ',
      NeuralNetworks.accuracy(final_theta, x_test, y_test))

# Native Bayes
word_vec = one_hot(sentiment)
x_train, x_test, y_train, y_test = train_test_split(word_vec,
                                                    y,
                                                    test_size=0.25,
                                                    stratify=y)
p_class_vect, p_class = NativeBayes.train_bayes(x_train, y_train)
y_pred = NativeBayes.predict_bayes(p_class_vect, p_class, x_test, y_test)
y_pred = y_pred - 1  # -1 0 1
print('Native Bayes accuracy: ', NativeBayes.accuracy(y_pred, y_test))
Beispiel #5
0
#!/usr/bin/python3
from keras.datasets import cifar10
from model import Sequential
from layers.pool import MaxPool
from one_hot import one_hot

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train, x_test = x_train / 255, x_test / 255
y_train = one_hot(y_train)

model = Sequential(x_train, y_train)

model.add_Conv(32, (3, 3))
model.add_Activation()
model.add_Pool()

model.add_Conv(32, (3, 3))
model.add_Activation()
model.add_Pool()

model.add_Conv(64, (3, 3))
model.add_Activation()
model.add_Pool()

model.add_Dense(512)
model.out(10)

model.compile(1, 32)
Beispiel #6
0
def getXY(input, algo, model, test=0, k=25):
    """
    input: 预处理过的语料库
    algo: 使用的特征权重计算方法名
    model: 使用的模型名

    test = 0 : 记录文件中出现的词汇并构造词汇表(训练集)
    test = 1 : 不构造词汇表,用已经构造好的(测试集)

    """
    global package

    # global voca_list
    # global labelset_list
    # global vocafreq_list
    # global weights_list
    # global doclist
    # global docname

    corpus = preprocess(input, package, test, k)
    labelset = package["labelset"]  # 获得preprocess确定的package
    voca = package["voca"]

    level = 2
    mod = 0
    if algo == "tf_idf":
        weights = tf_idf(corpus, test, package)
        mod = 1
    elif algo == "tf_dc":
        weights = tf_dc(corpus, test, package)
    elif algo == "tf_bdc":
        weights = tf_bdc(corpus, test, package)
    elif algo == "iqf_qf_icf":
        weights = iqf_qf_icf(corpus, test, package)
    elif algo == "tf_eccd":
        weights = tf_eccd(corpus, test, package)
    elif algo == "tf_ig":
        weights = tf_ig(corpus, test, package)
    elif algo == "tf_rf":
        weights = tf_rf(corpus, test, package)
        level = 3
    elif algo == "tf_chi":
        weights = tf_chi(corpus, test, package)
        level = 3
    elif algo == "tf_mrf":
        weights = tf_mrf(corpus, test, package)
        level = 3
    elif algo == "tf_nrf":
        weights = tf_nrf(corpus, test, package)
        level = 3
    elif algo == "tf_vc":
        weights = tf_vc(corpus, test, package)

    # print weights
    X = []
    Y = []  # 标签集
    count = 0
    vocalen = len(voca)
    for doc in corpus:
        if count % 1000 == 0:
            print(str(count) + "/" + str(len(corpus)))
            # print('weights\'s size:')
            # print(sys.getsizeof(weights))
            # print(sys.getsizeof(X))
            # process = psutil.Process(os.getpid())
            # print('Used Memory:', process.memory_info().rss / 1024 / 1024, 'MB')
            # print(memory_usage_psutil())
        count += 1

        # process label
        labelset.append(doc["label"])
        Y.append(int(np.argmax(one_hot(
            labelset)[-1])))  # 在确定的labelset中添加label,以保证label的位置一致,再进行截取
        # np.argmax返回最大数的索引
        labelset = labelset[:-1]  # 重置labelset

        # process word
        temvocalist = list(voca) + list(
            doc["split_sentence"])  # 与label同理  voca用以确定位置

        tem_one_hot = one_hot(temvocalist)[vocalen:]  # 截取

        # for word in range(len(tem_one_hot)):  # .shape[0]
        for word in range(tem_one_hot.shape[0]):
            temlabel = doc["label"]  # earn
            temword = doc["split_sentence"][word]
            temdoc = doc["document"]  # earn638

            # print("\ntem_one_hot:")
            # print(tem_one_hot)
            # print("\n")

            # weights--词频*权重
            if level == 2:
                if mod == 0:  # 有监督
                    tem_one_hot[word] *= weights[temlabel][temword]
                else:  # 无监督
                    tem_one_hot[word] *= weights[temdoc][temword]
            else:
                tem_one_hot[word] *= weights[temlabel][temdoc][temword]

        # 空array
        try:
            tem_one_hot = np.max(tem_one_hot, axis=0)  # 去除多余行  每列只保留最大数
        except ValueError:
            # tem_one_hot = tem_one_hot[0]
            # print(tem_one_hot)
            pass

        if model.lower() == "knn":
            tem_one_hot = preprocessing.normalize(
                np.array(tem_one_hot).reshape(1, -1), norm='l2')  # 转变为矩阵

        # print(tem_one_hot.toarray())
        # tem_one_hot = np.full(tem_one_hot)
        # print(tem_one_hot)  # 稀疏矩阵转化回原矩阵!
        # print(type(tem_one_hot.toarray()))

        X.append(np.squeeze(tem_one_hot.toarray().tolist()))
        # print(tem_one_hot.toarray().tolist())

        # X.append(tem_one_hot)

    # print(np.array(X))
    # print(Y)

    return X, Y  # squeeze压缩维度  如将二维转变为一维
Beispiel #7
0
import one_hot
import numpy

labels = numpy.array([0, 1, 1, 2, 2, 3])
res = one_hot.one_hot(labels, 4)
print(res)