Exemple #1
0
#Load Training Data
train_data = np.loadtxt("NoisyXORTrainingData.txt")
X_train = train_data[:, 0:-1]  #last column is class labels
Y_train = train_data[:, -1]  #last column is class labels

CLASSES = list(set(Y_train))  #list of classes

#Load Testing Data
test_data = np.loadtxt("NoisyXORTestData.txt")
X_test = test_data[:, 0:-1]  #last column is class labels
Y_test = test_data[:, -1]  #last column is class labels

#Initialize the Tsetlin Machine
tm = MultiClassTsetlinMachine(NUM_CLAUSES,
                              THRESHOLD,
                              S,
                              boost_true_positive_feedback=0)

#Fit TM on training data
tm.fit(X_train, Y_train, epochs=1)

#Predict on test data, compare to ground truth, calculate accuracy0
print("Accuracy:", 100 * (tm.predict(X_test) == Y_test).mean())

print('Type II feedbacks on clauses: ', tm.typeII_feedback_clauses)
#Prediction on some random data
print("Prediction: x1 = 1, x2 = 0, ... -> y = %d" %
      (tm.predict(np.array([[1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0]]))))
print("Prediction: x1 = 0, x2 = 1, ... -> y = %d" %
      (tm.predict(np.array([[0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0]]))))
print("Prediction: x1 = 0, x2 = 0, ... -> y = %d" %
feature_count_negated_negative = np.zeros(NUM_FEATURES)

clauses = np.zeros((RUNS * NUM_CLAUSES, NUM_FEATURES * 2 + 1))

clause_dict = {}

for r in range(RUNS):
    print('Run:', r)
    x_train, x_test, y_train, y_test = train_test_split(data, labels)
    x_train_ids = x_train[:, -1]
    x_test_ids = x_test[:, -1]
    x_train = x_train[:, :-1]
    x_test = x_test[:, :-1]

    #print('\nsplits ready:',x_train.shape, x_test.shape)
    tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s)
    tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True)
    print('\nfit done')
    result[r] = 100 * (tm.predict(x_test) == y_test).mean()
    feature_vector = np.zeros(NUM_FEATURES * 2)
    for cur_cls in CLASSES:
        for cur_clause in range(NUM_CLAUSES):
            if cur_clause % 2 == 0:
                clause_type = 'positive'
            else:
                clause_type = 'negative'
            this_clause = ''
            for f in range(0, NUM_FEATURES):
                action_plain = tm.ta_action(int(cur_cls), cur_clause, f)
                action_negated = tm.ta_action(int(cur_cls), cur_clause,
                                              f + NUM_FEATURES)
train_data = np.loadtxt("NoisyXORTrainingData.txt")
X_train = train_data[:, 0:-1]  #last column is class labels
Y_train = train_data[:, -1]  #last column is class labels

CLASSES = list(set(Y_train))  #list of classes

#Load Testing Data
test_data = np.loadtxt("NoisyXORTestData.txt")
X_test = test_data[:, 0:-1]  #last column is class labels
Y_test = test_data[:, -1]  #last column is class labels

print(X_train.shape)
print(Y_train.shape)
'''
#Initialize the Tsetlin Machine
tm = MultiClassTsetlinMachine(NUM_CLAUSES, THRESHOLD, S, boost_true_positive_feedback=0)

#Fit TM on training data
tm.fit(X_train, Y_train, epochs=1)

##save
tm.save_model('tm_model.npz', Y_train)

print("Accuracy:", 100*(tm.predict(X_test) == Y_test).mean())
'''
tm2 = MultiClassTsetlinMachine.load_model('tm_model.npz')

#Predict on test data, compare to ground truth, calculate accuracy0
#
print("Accuracy after saving:", 100 * (tm2.predict(X_test) == Y_test).mean())
Xtest = binarize_selected(combo_test, list_of_uniques, cs)

print('binarized train', Xtrain.shape)
print('binarized test', Xtest.shape)

#X_train = Xtrain.reshape((Xtrain.shape[0],len(addendum_context),1,usum))
#X_test = Xtest.reshape((Xtest.shape[0],len(addendum_context),1,usum))

#print('reshaped train',X_train.shape)
#print('reshaped test',X_test.shape)

#np.save('x_train_conv', Xtrain)
#np.save('x_test_conv', Xtest)

# Setup
tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting)
labels_test_indx = np.where(labels_test == 1)
labels_train_indx = np.where(labels_train == 1)

acc = []
acc_train = []
# Training
for i in range(RUNS):
    print(i)
    start_training = time()
    tm.fit(Xtrain, labels_train, epochs=50, incremental=True)
    stop_training = time()

    start_testing = time()
    res_test = tm.predict(Xtest)
    res_train = tm.predict(Xtrain)
Exemple #5
0
print(reverse_word_map)
print(word_idx)
print(sents[10], data[10])

x_train, x_test, y_train, y_test = train_test_split(data, labels)
x_train_ids = x_train[:, -1]
x_test_ids = x_test[:, -1]
x_train = x_train[:, :-1]
x_test = x_test[:, :-1]

NUM_CLAUSES = 20
T = 15
s = 3.9

print('\nsplits ready:', x_train.shape, x_test.shape)
tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s)
tm.fit(x_train, y_train, epochs=200, incremental=True)
print('\nfit done')
result = 100 * (tm.predict(x_test) == y_test).mean()
print(result)

res = tm.predict(x_test)
for i in range(len(x_test_ids)):
    sidx = x_test_ids[i]
    print(sents[sidx], res[i])

NUM_CLAUSES = 10
NUM_FEATURES = len(x_train[0])
CLASSES = list(set(y_train))

print('Num Clauses:', NUM_CLAUSES)
Exemple #6
0
s=3#s=10
weighting = True
training_epoch=5
RUNS=100
'''

CLAUSES=250
T=60
s=37
weighting = True
training_epoch=5
RUNS=100

X_train, X_test, y_train, y_test = train_test_split(featureset_transformed_X, featureset_transformed_y, test_size=0.30, random_state=42, shuffle=True)

tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting,append_negated=False)

allacc=[]
for i in range(RUNS):
	tm.fit(X_train, y_train, epochs=training_epoch, incremental=True)
	res_test=tm.predict(X_test)
	print(res_test)
	acc_test = 100*(res_test == y_test).mean()

	allacc.append(acc_test)
	
	#print(type(y_test), type(res_test), len(y_test), len(res_test))
	prf_test_macro=precision_recall_fscore_support(list(res_test), list(y_test), average='macro')
	

	prf_test_macro=[str(round(p,2)) for p in prf_test_macro[:-1]]
X_train = training[:, 0:2]
Y_train = training[:, -1]

X_test = test[:, 0:2]
Y_test = test[:, -1]

CLASSES = list(set(Y_train))  #list of classes
NUM_FEATURES = len(X_train[0])  #number of features

print('Num Clauses:', NUM_CLAUSES)
print('Num Classes: ', len(CLASSES), ' : ', CLASSES)
print('Num Features: ', NUM_FEATURES)

tm = MultiClassTsetlinMachine(NUM_CLAUSES,
                              THRESHOLD,
                              S,
                              boost_true_positive_feedback=0)

tm.fit(X_train, Y_train, epochs=200)

print("Accuracy:", 100 * (tm.predict(X_test) == Y_test))

all_clauses = [[] for i in range(NUM_CLAUSES)]

for cur_cls in CLASSES:
    for cur_clause in range(NUM_CLAUSES):
        this_clause = ''
        for f in range(NUM_FEATURES * 2):
            action = tm.ta_action(int(cur_cls), cur_clause, f)
            if action == 1:
                if this_clause != '':
Exemple #8
0
y = df2.iloc[:, :].values
y = np.reshape(y, len(y))

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.084,
                                                    shuffle=False)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

print('data fitting started..........................')

tm = MultiClassTsetlinMachine(2000, 40, 27, weighted_clauses=False)

print("\nAccuracy over 1000 epochs:\n")
tempAcc = []
for i in range(500):
    start_training = time()
    tm.fit(x_train, y_train, epochs=1, incremental=True)
    stop_training = time()

    start_testing = time()
    result1 = 100 * (tm.predict(x_test) == y_test).mean()
    result2 = 100 * (tm.predict(x_train) == y_train).mean()
    stop_testing = time()
    tempAcc.append(result1)
    print(
        "#%d AccuracyTrain: %.2f%% AccuracyTest: %.2f%% Training: %.2fs Testing: %.2fs"
Exemple #9
0
X_final1 = np.concatenate((X_text, X_target), axis=1)
X_final2 = np.concatenate((Loc_vec1, Loc_vec2), axis=1)

X_final3 = np.concatenate((X_final1, X_final2), axis=1)
X_final4 = np.concatenate((X_final3, restSC), axis=1)

#Split training and testing samples
X_train = X_final4[0:
                   3608, :]  #For Laptop dataset change this from 3608 to 2328
X_test = X_final4[3608:, :]  #For Laptop dataset change this from 3608 to 2328
ytrain = y[0:3608]  #For Laptop dataset change this from 3608 to 2328
ytest = y[3608:]  #For Laptop dataset change this from 3608 to 2328

#%%%%%%%%%%%%%%%%%% Initialize Tsetlin Machine %%%%%%%%%%%%%%%%%%%%%%%
tm1 = MultiClassTsetlinMachine(
    700, 90 * 100, 15,
    weighted_clauses=True)  #number of clause= 700, T = 90*100 and s = 15
#tm1.fit(X_train, ytrain, epochs=0)
print("\nTraining Classification Layer...")

print("\nAccuracy over 1000 epochs:\n")
max = 0
for i in range(500):
    start_training = time()
    tm1.fit(X_train, ytrain, epochs=1, incremental=True)
    stop_training = time()

    start_testing = time()
    result2 = 100 * (tm1.predict(X_train) == ytrain).mean()
    result1 = 100 * (tm1.predict(X_test) == ytest).mean()
    y_pred = tm1.predict(X_test)
Exemple #10
0
	CLAUSES=33
	T=28
	s=12.5
	weighting = True
	training_epoch=5
	RUNS=100
	
featureset=np.load(fname.replace('.txt','')+'_featureset.npy')

X=featureset[:,:-1]
y=featureset[:,-1]
y=[int(yy) for yy in y]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, shuffle=False)

tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting,append_negated=False)

allacc=[]
for i in range(RUNS):
	tm.fit(X_train, y_train, epochs=training_epoch, incremental=True)
	res_test=tm.predict(X_test)
	
	acc_test = 100*(res_test == y_test).mean()

	allacc.append(acc_test)
	prf_test_macro=precision_recall_fscore_support(res_test, y_test, average='macro')
	prf_test_macro=[str(round(p,2)) for p in prf_test_macro[:-1]]
	
	prf_test_micro=precision_recall_fscore_support(res_test, y_test, average='micro')
	prf_test_micro=[str(round(p,2)) for p in prf_test_micro[:-1]]
	
data = np.loadtxt("NoisyXORTestData.txt")

np.random.shuffle(data)
training, test = data[:4000, :], data[4000:, :]

X_train = training[:, 0:2]
Y_train = training[:, -1]

X_test = test[:, 0:2]
Y_test = test[:, -1]

CLASSES = list(set(Y_train))  #list of classes
NUM_FEATURES = len(X_train[0])  #number of features

tm = MultiClassTsetlinMachine(NUM_CLAUSES,
                              THRESHOLD,
                              S,
                              boost_true_positive_feedback=0)

#Fit TM on training data
tm.fit(X_train, Y_train, epochs=1)
'''
print('Train: ',len(X_train))
print('\nTest: ', len(X_test))
print('\nNum Clauses:', NUM_CLAUSES)
print('\nNum Classes: ', len(CLASSES),' : ', CLASSES)
print('\nNum Features: ', NUM_FEATURES)

print("Accuracy:", 100*(tm.predict(X_test) == Y_test).mean())

for cur_cls in CLASSES:
	for cur_clause in range(NUM_CLAUSES):
Exemple #12
0
feature_count_ignore = np.zeros(NUM_FEATURES)
feature_count_contradiction = np.zeros(NUM_FEATURES)
feature_count_negated_negative= np.zeros(NUM_FEATURES)

clauses=np.zeros((RUNS*NUM_CLAUSES,NUM_FEATURES*2+1))

clause_dict={}

for r in range(RUNS):
	print('Run:',r)
	x_train, x_test, y_train, y_test = train_test_split(data, labels)
	x_train_ids=x_train[:,-1]
	x_test_ids=x_test[:,-1]
	x_train=x_train[:,:-1]
	x_test=x_test[:,:-1]
	tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s)
	tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True)
	print('\nfit done')
tm.save_model('causal_model.npz')
res=tm.predict(x_test)
print('\n\nFull result average=',100*(res == y_test).mean())
  
for i in range(len(x_test_ids)):
	sidx=x_test_ids[i]
	print(sents[sidx], '\nPredicted:',res[i],'\nActual',y_test[i])
	tm.predict_and_print(np.array([x_test[i]]))
	result[r] = 100*(tm.predict(x_test) == y_test).mean()
	feature_vector=np.zeros(NUM_FEATURES*2)
	for cur_cls in CLASSES:
		for cur_clause in range(NUM_CLAUSES):
			if cur_clause%2==0:
Exemple #13
0
fo.write('\ns: '+str(s))
fo.write('\nNum Features: '+ str(NUM_FEATURES))
fo.write('\nTotal Runsper Step: '+str(RUNS))
fo.write('\nTotal Steps: '+str(STEPS))
fo.write('\nSTEP Size: '+str(STEP_SIZE))
fo.write('\nTrain Epochs: '+str(TRAIN_EPOCHS)+'\n\n')
fo.write('Num_CLAUSES\tMean\tMax\tAll\n')

result_mean=np.zeros(STEPS)
result_max=np.zeros(STEPS)
clausesizes=np.zeros(STEPS)

for s in range(STEPS):
	lr=np.zeros(RUNS)
	NUM_CLAUSES+=STEP_SIZE
	tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s)
	for r in range(RUNS):
		x_train, x_test, y_train, y_test = train_test_split(data, labels)
		x_train_ids=x_train[:,-1]
		x_test_ids=x_test[:,-1]
		x_train=x_train[:,:-1]
		x_test=x_test[:,:-1]
		print ('Step '+str(s)+' Run '+str(r)+' num_clause '+str(NUM_CLAUSES))
		tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True)
		lr[r]=100*(tm.predict(x_test) == y_test).mean()
	result_mean[s] = lr.mean()
	result_max[s] = lr.max()
	clausesizes[s]=NUM_CLAUSES
	
	fo.write(str(NUM_CLAUSES)+'\t'+str(result_mean[s])+'\t'+str(result_max[s])+'\t'+str(lr)+'\n')