#Load Training Data train_data = np.loadtxt("NoisyXORTrainingData.txt") X_train = train_data[:, 0:-1] #last column is class labels Y_train = train_data[:, -1] #last column is class labels CLASSES = list(set(Y_train)) #list of classes #Load Testing Data test_data = np.loadtxt("NoisyXORTestData.txt") X_test = test_data[:, 0:-1] #last column is class labels Y_test = test_data[:, -1] #last column is class labels #Initialize the Tsetlin Machine tm = MultiClassTsetlinMachine(NUM_CLAUSES, THRESHOLD, S, boost_true_positive_feedback=0) #Fit TM on training data tm.fit(X_train, Y_train, epochs=1) #Predict on test data, compare to ground truth, calculate accuracy0 print("Accuracy:", 100 * (tm.predict(X_test) == Y_test).mean()) print('Type II feedbacks on clauses: ', tm.typeII_feedback_clauses) #Prediction on some random data print("Prediction: x1 = 1, x2 = 0, ... -> y = %d" % (tm.predict(np.array([[1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0]])))) print("Prediction: x1 = 0, x2 = 1, ... -> y = %d" % (tm.predict(np.array([[0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0]])))) print("Prediction: x1 = 0, x2 = 0, ... -> y = %d" %
feature_count_negated_negative = np.zeros(NUM_FEATURES) clauses = np.zeros((RUNS * NUM_CLAUSES, NUM_FEATURES * 2 + 1)) clause_dict = {} for r in range(RUNS): print('Run:', r) x_train, x_test, y_train, y_test = train_test_split(data, labels) x_train_ids = x_train[:, -1] x_test_ids = x_test[:, -1] x_train = x_train[:, :-1] x_test = x_test[:, :-1] #print('\nsplits ready:',x_train.shape, x_test.shape) tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s) tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True) print('\nfit done') result[r] = 100 * (tm.predict(x_test) == y_test).mean() feature_vector = np.zeros(NUM_FEATURES * 2) for cur_cls in CLASSES: for cur_clause in range(NUM_CLAUSES): if cur_clause % 2 == 0: clause_type = 'positive' else: clause_type = 'negative' this_clause = '' for f in range(0, NUM_FEATURES): action_plain = tm.ta_action(int(cur_cls), cur_clause, f) action_negated = tm.ta_action(int(cur_cls), cur_clause, f + NUM_FEATURES)
train_data = np.loadtxt("NoisyXORTrainingData.txt") X_train = train_data[:, 0:-1] #last column is class labels Y_train = train_data[:, -1] #last column is class labels CLASSES = list(set(Y_train)) #list of classes #Load Testing Data test_data = np.loadtxt("NoisyXORTestData.txt") X_test = test_data[:, 0:-1] #last column is class labels Y_test = test_data[:, -1] #last column is class labels print(X_train.shape) print(Y_train.shape) ''' #Initialize the Tsetlin Machine tm = MultiClassTsetlinMachine(NUM_CLAUSES, THRESHOLD, S, boost_true_positive_feedback=0) #Fit TM on training data tm.fit(X_train, Y_train, epochs=1) ##save tm.save_model('tm_model.npz', Y_train) print("Accuracy:", 100*(tm.predict(X_test) == Y_test).mean()) ''' tm2 = MultiClassTsetlinMachine.load_model('tm_model.npz') #Predict on test data, compare to ground truth, calculate accuracy0 # print("Accuracy after saving:", 100 * (tm2.predict(X_test) == Y_test).mean())
Xtest = binarize_selected(combo_test, list_of_uniques, cs) print('binarized train', Xtrain.shape) print('binarized test', Xtest.shape) #X_train = Xtrain.reshape((Xtrain.shape[0],len(addendum_context),1,usum)) #X_test = Xtest.reshape((Xtest.shape[0],len(addendum_context),1,usum)) #print('reshaped train',X_train.shape) #print('reshaped test',X_test.shape) #np.save('x_train_conv', Xtrain) #np.save('x_test_conv', Xtest) # Setup tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting) labels_test_indx = np.where(labels_test == 1) labels_train_indx = np.where(labels_train == 1) acc = [] acc_train = [] # Training for i in range(RUNS): print(i) start_training = time() tm.fit(Xtrain, labels_train, epochs=50, incremental=True) stop_training = time() start_testing = time() res_test = tm.predict(Xtest) res_train = tm.predict(Xtrain)
print(reverse_word_map) print(word_idx) print(sents[10], data[10]) x_train, x_test, y_train, y_test = train_test_split(data, labels) x_train_ids = x_train[:, -1] x_test_ids = x_test[:, -1] x_train = x_train[:, :-1] x_test = x_test[:, :-1] NUM_CLAUSES = 20 T = 15 s = 3.9 print('\nsplits ready:', x_train.shape, x_test.shape) tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s) tm.fit(x_train, y_train, epochs=200, incremental=True) print('\nfit done') result = 100 * (tm.predict(x_test) == y_test).mean() print(result) res = tm.predict(x_test) for i in range(len(x_test_ids)): sidx = x_test_ids[i] print(sents[sidx], res[i]) NUM_CLAUSES = 10 NUM_FEATURES = len(x_train[0]) CLASSES = list(set(y_train)) print('Num Clauses:', NUM_CLAUSES)
s=3#s=10 weighting = True training_epoch=5 RUNS=100 ''' CLAUSES=250 T=60 s=37 weighting = True training_epoch=5 RUNS=100 X_train, X_test, y_train, y_test = train_test_split(featureset_transformed_X, featureset_transformed_y, test_size=0.30, random_state=42, shuffle=True) tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting,append_negated=False) allacc=[] for i in range(RUNS): tm.fit(X_train, y_train, epochs=training_epoch, incremental=True) res_test=tm.predict(X_test) print(res_test) acc_test = 100*(res_test == y_test).mean() allacc.append(acc_test) #print(type(y_test), type(res_test), len(y_test), len(res_test)) prf_test_macro=precision_recall_fscore_support(list(res_test), list(y_test), average='macro') prf_test_macro=[str(round(p,2)) for p in prf_test_macro[:-1]]
X_train = training[:, 0:2] Y_train = training[:, -1] X_test = test[:, 0:2] Y_test = test[:, -1] CLASSES = list(set(Y_train)) #list of classes NUM_FEATURES = len(X_train[0]) #number of features print('Num Clauses:', NUM_CLAUSES) print('Num Classes: ', len(CLASSES), ' : ', CLASSES) print('Num Features: ', NUM_FEATURES) tm = MultiClassTsetlinMachine(NUM_CLAUSES, THRESHOLD, S, boost_true_positive_feedback=0) tm.fit(X_train, Y_train, epochs=200) print("Accuracy:", 100 * (tm.predict(X_test) == Y_test)) all_clauses = [[] for i in range(NUM_CLAUSES)] for cur_cls in CLASSES: for cur_clause in range(NUM_CLAUSES): this_clause = '' for f in range(NUM_FEATURES * 2): action = tm.ta_action(int(cur_cls), cur_clause, f) if action == 1: if this_clause != '':
y = df2.iloc[:, :].values y = np.reshape(y, len(y)) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.084, shuffle=False) print(x_train.shape) print(x_test.shape) print(y_train.shape) print(y_test.shape) print('data fitting started..........................') tm = MultiClassTsetlinMachine(2000, 40, 27, weighted_clauses=False) print("\nAccuracy over 1000 epochs:\n") tempAcc = [] for i in range(500): start_training = time() tm.fit(x_train, y_train, epochs=1, incremental=True) stop_training = time() start_testing = time() result1 = 100 * (tm.predict(x_test) == y_test).mean() result2 = 100 * (tm.predict(x_train) == y_train).mean() stop_testing = time() tempAcc.append(result1) print( "#%d AccuracyTrain: %.2f%% AccuracyTest: %.2f%% Training: %.2fs Testing: %.2fs"
X_final1 = np.concatenate((X_text, X_target), axis=1) X_final2 = np.concatenate((Loc_vec1, Loc_vec2), axis=1) X_final3 = np.concatenate((X_final1, X_final2), axis=1) X_final4 = np.concatenate((X_final3, restSC), axis=1) #Split training and testing samples X_train = X_final4[0: 3608, :] #For Laptop dataset change this from 3608 to 2328 X_test = X_final4[3608:, :] #For Laptop dataset change this from 3608 to 2328 ytrain = y[0:3608] #For Laptop dataset change this from 3608 to 2328 ytest = y[3608:] #For Laptop dataset change this from 3608 to 2328 #%%%%%%%%%%%%%%%%%% Initialize Tsetlin Machine %%%%%%%%%%%%%%%%%%%%%%% tm1 = MultiClassTsetlinMachine( 700, 90 * 100, 15, weighted_clauses=True) #number of clause= 700, T = 90*100 and s = 15 #tm1.fit(X_train, ytrain, epochs=0) print("\nTraining Classification Layer...") print("\nAccuracy over 1000 epochs:\n") max = 0 for i in range(500): start_training = time() tm1.fit(X_train, ytrain, epochs=1, incremental=True) stop_training = time() start_testing = time() result2 = 100 * (tm1.predict(X_train) == ytrain).mean() result1 = 100 * (tm1.predict(X_test) == ytest).mean() y_pred = tm1.predict(X_test)
CLAUSES=33 T=28 s=12.5 weighting = True training_epoch=5 RUNS=100 featureset=np.load(fname.replace('.txt','')+'_featureset.npy') X=featureset[:,:-1] y=featureset[:,-1] y=[int(yy) for yy in y] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, shuffle=False) tm = MultiClassTsetlinMachine(CLAUSES, T, s, weighted_clauses=weighting,append_negated=False) allacc=[] for i in range(RUNS): tm.fit(X_train, y_train, epochs=training_epoch, incremental=True) res_test=tm.predict(X_test) acc_test = 100*(res_test == y_test).mean() allacc.append(acc_test) prf_test_macro=precision_recall_fscore_support(res_test, y_test, average='macro') prf_test_macro=[str(round(p,2)) for p in prf_test_macro[:-1]] prf_test_micro=precision_recall_fscore_support(res_test, y_test, average='micro') prf_test_micro=[str(round(p,2)) for p in prf_test_micro[:-1]]
data = np.loadtxt("NoisyXORTestData.txt") np.random.shuffle(data) training, test = data[:4000, :], data[4000:, :] X_train = training[:, 0:2] Y_train = training[:, -1] X_test = test[:, 0:2] Y_test = test[:, -1] CLASSES = list(set(Y_train)) #list of classes NUM_FEATURES = len(X_train[0]) #number of features tm = MultiClassTsetlinMachine(NUM_CLAUSES, THRESHOLD, S, boost_true_positive_feedback=0) #Fit TM on training data tm.fit(X_train, Y_train, epochs=1) ''' print('Train: ',len(X_train)) print('\nTest: ', len(X_test)) print('\nNum Clauses:', NUM_CLAUSES) print('\nNum Classes: ', len(CLASSES),' : ', CLASSES) print('\nNum Features: ', NUM_FEATURES) print("Accuracy:", 100*(tm.predict(X_test) == Y_test).mean()) for cur_cls in CLASSES: for cur_clause in range(NUM_CLAUSES):
feature_count_ignore = np.zeros(NUM_FEATURES) feature_count_contradiction = np.zeros(NUM_FEATURES) feature_count_negated_negative= np.zeros(NUM_FEATURES) clauses=np.zeros((RUNS*NUM_CLAUSES,NUM_FEATURES*2+1)) clause_dict={} for r in range(RUNS): print('Run:',r) x_train, x_test, y_train, y_test = train_test_split(data, labels) x_train_ids=x_train[:,-1] x_test_ids=x_test[:,-1] x_train=x_train[:,:-1] x_test=x_test[:,:-1] tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s) tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True) print('\nfit done') tm.save_model('causal_model.npz') res=tm.predict(x_test) print('\n\nFull result average=',100*(res == y_test).mean()) for i in range(len(x_test_ids)): sidx=x_test_ids[i] print(sents[sidx], '\nPredicted:',res[i],'\nActual',y_test[i]) tm.predict_and_print(np.array([x_test[i]])) result[r] = 100*(tm.predict(x_test) == y_test).mean() feature_vector=np.zeros(NUM_FEATURES*2) for cur_cls in CLASSES: for cur_clause in range(NUM_CLAUSES): if cur_clause%2==0:
fo.write('\ns: '+str(s)) fo.write('\nNum Features: '+ str(NUM_FEATURES)) fo.write('\nTotal Runsper Step: '+str(RUNS)) fo.write('\nTotal Steps: '+str(STEPS)) fo.write('\nSTEP Size: '+str(STEP_SIZE)) fo.write('\nTrain Epochs: '+str(TRAIN_EPOCHS)+'\n\n') fo.write('Num_CLAUSES\tMean\tMax\tAll\n') result_mean=np.zeros(STEPS) result_max=np.zeros(STEPS) clausesizes=np.zeros(STEPS) for s in range(STEPS): lr=np.zeros(RUNS) NUM_CLAUSES+=STEP_SIZE tm = MultiClassTsetlinMachine(NUM_CLAUSES, T, s) for r in range(RUNS): x_train, x_test, y_train, y_test = train_test_split(data, labels) x_train_ids=x_train[:,-1] x_test_ids=x_test[:,-1] x_train=x_train[:,:-1] x_test=x_test[:,:-1] print ('Step '+str(s)+' Run '+str(r)+' num_clause '+str(NUM_CLAUSES)) tm.fit(x_train, y_train, epochs=TRAIN_EPOCHS, incremental=True) lr[r]=100*(tm.predict(x_test) == y_test).mean() result_mean[s] = lr.mean() result_max[s] = lr.max() clausesizes[s]=NUM_CLAUSES fo.write(str(NUM_CLAUSES)+'\t'+str(result_mean[s])+'\t'+str(result_max[s])+'\t'+str(lr)+'\n')