def getInputData(batchsize): readdata = ReadData() trainingFiles, testingFiles = readdata.filePathConstructor() features = readdata.input_pipeline(trainingFiles, batchsize) example_batch = tf.reshape(features, [-1]) item = tf.string_split(example_batch, delimiter="").values.eval() return [dict1[alp.decode().lower()] for alp in list(item)]
def DenoiseByEMD(): """ This function denoises the original signal by removing n levels of noises. """ globalData = ReadData() removeLevel = Settings.denoiseLevel filename = './Cashe/globalData_EMD_{}.pickle'.format(removeLevel) # if the denoising is not used (removeLevel = 0) if removeLevel < 1: globalDataEMD = globalData with open(filename, 'wb') as f: pickle.dump(globalDataEMD, f) return globalDataEMD if os.path.exists(filename): with open(filename, 'rb') as f: globalDataEMD = pickle.load(f) return globalDataEMD # Imfs globalDataImfs = GetImfs(globalData) globalDataEMD = {} for key in globalData: df = globalData[key] dfImfs = globalDataImfs[key] NLevels = dfImfs.shape[1] tsReons = dfImfs[range(Settings.denoiseLevel, NLevels)].sum(axis=1) tsReons.name = df.columns[0] globalDataEMD[key] = pd.DataFrame(tsReons) # Save the data to the filename as a pickle file with open(filename, 'wb') as f: pickle.dump(globalDataEMD, f) return globalDataEMD
def __init__(self): self.data = ReadData().please_read_data() self.cluster1 = [ ] #define cluster array so we can cluster elements to them self.cluster2 = [] self.cluster3 = [] self.centroid1 = [] #define centroid array so we can update them self.centroid2 = [] self.centroid3 = []
def main(args): #---set up path for training and test data (NUAA face liveness dataset)-------------- model_name = args.model learning_rate = args.lr epoch = args.epoch with open(path) as file: print("Reading from json ... ") data = json.load(file)[model_name] accuracy = data['accuracy'] model_file = data['file'] print("Reading input from the NUAA dataset ... ") readd = ReadData() clientdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ClientNormalized/' imposterdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ImposterNormalized/' client_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_train_normalized.txt' imposter_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_train_normalized.txt' client_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_test_normalized.txt' imposter_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_test_normalized.txt' #---------------read training, test data---------------- train_images, train_labels = readd.read_data(clientdir, imposterdir, client_train_normaized_file, imposter_train_normaized_file) test_images, test_labels = readd.read_data(clientdir, imposterdir, client_test_normaized_file, imposter_test_normaized_file) for i in range(0,1): #--pick one of the following models for face liveness detection--- if model_name =='CNN': print("Selected CNN") cnn = CNNModel() # simple CNN model for face liveness detection--- else: print("Selected Inception") cnn = InceptionV4Model() #Inception model for liveness detection if args.resume: print("Resuming from the best model") model = cnn.load_model(model_file)#to use pretrained model else: print("Starting from scratch by creating a new model") model = cnn.create_model(learning_rate) # create and train a new model print("Starting training ...") model = cnn.train_model(model, train_images,train_labels,test_images,test_labels, epoch, accuracy, model_file, model_name) test_loss, test_acc = cnn.evaluate(model, test_images, test_labels) print('iteration = ' + str(i) + ' ---------------------------------------------========') print("**************************************Done***************************************")
optimizer=optimizer, metrics=['accuracy']) else: model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() if args.check_build: exit() embedding = {'type': args.embedding_type, 'path': args.embedding_path} if args.model == 'sentence_pair': reader = ReadData(path_file=args.dataset, embedding_config=embedding, data_shape=inputs, train_val_split=args.train_val_split, sentence_pair=True) else: reader = ReadData(path_file=args.dataset, embedding_config=embedding, data_shape=inputs, train_val_split=args.train_val_split, sentence_pair=False) print('Reading Validation Data ..') val_x, val_y = reader.read_val() train_generator = reader.generator() log_dir = args.model
inputs = [(args.no_comments, 512), (30, 1536)] model = RecurrentCNN(no_filters=hidden_size, no_classes=args.no_classes) elif args.model == 'bilstm_rcnn': inputs = [(args.no_comments, 512), (30, 1536)] model = BiLSTMRecurrentCNN(hidden_size, no_classes=args.no_classes) model = model.build(inputs) model.load_weights(args.weights) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() reader = ReadData(dataset=args.dataset, text_embedding_path=args.text_embedding, video_feature_path=args.video_features, data_shape=inputs, train_val_split=1.) results = [] labels = [] prog_bar = tqdm(total=int(reader.val_size / args.batch_size)) num_batches = int(reader.val_size / args.batch_size) i = 0 for x, y in reader.generator_val(batch_size=args.batch_size): label = list(y) result = list(model.predict(x))
__version__ = "0.0.1" __maintainer__ = "Agniv Sen" __email__ = "*****@*****.**" __status__ = "Protoyping" """ # **************************************************** # This is the entry point of this entire project. # For someone who wants to understand the code flow, please start from this point # **************************************************** rd = ReadData(); #Initializing File Reader Class featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,2)) featureMapProj = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,param.PARTICLE_COUNT,3)) featureStore = np.zeros((param.FEATURE_SIZE)) world = np.zeros((param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z)); stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE) cameraState = np.zeros((stateVectorSize)) # Variables for archiving position vector, quaternion and features _x = [] _y = [] _z = []
'''Runs training until cost values converge to within some interval''' val = self.linreg(learning_rate, ind, dep) old_val = 0 #Can change this variable to decide how much convergence is wanted while np.absolute(val-old_val) > 1: old_val = val val = self.linreg(learning_rate, ind, dep) self.getTheta() def getTheta(self): '''Prints out Value for current weight and bias variables''' print "Weight Bias" print self.weight, self.bias if __name__ == '__main__': #command line to run this properly #python NiceLinReg.py data.csv [2,3] 1 np.random.seed(42) loader = ReadData() loader.load(sys.argv[1], sys.argv[2], int(sys.argv[3])) print "Temp Only" tempOnly = NiceLinReg() dailyTemp = loader.getInd(0) DOJIA = loader.getDep() tempOnly.train(.000005, dailyTemp, DOJIA) print "\nDiff in Temp and avg highest recorded temp" diff = NiceLinReg() diffList = loader.diff(0,1) diff.train(0.000000000049, diffList, DOJIA)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat May 5 01:20:32 2018 @author: computer """ from ReadData import ReadData from catboost import Pool, CatBoostRegressor from sklearn.model_selection import train_test_split import pandas as pd import gc data = ReadData() data.X_train.drop(data.X_train.index[:int( 1e+8 )], inplace=True) data.y_train.drop(data.y_train.index[:int( 1e+8 )], inplace=True) X_train, X_valid, y_train, y_valid = train_test_split(data.X_train, data.y_train, test_size=0.1) cat_features = X_train.columns.get_indexer_for(data.CATEGORICAL_FEATURES) del data; gc.collect() train_pool = Pool(X_train, y_train, cat_features=cat_features) del X_train; del y_train; gc.collect()
def mainFunc(): filepath = 'D:/Code/readfile/data.csv' d = ReadData(filepath) print(d.data.keys())
if args.model.endswith('lstm'): model = ConvLSTMModel1(hidden_states, classes, attention_size=attention_size, use_attention=args.use_attention) elif args.model.endswith('deep'): model = ConvLSTMModel2(hidden_states, classes, attention_size=attention_size, use_attention=args.use_attention) else: model = ConvModel(classes) reader = ReadData(args.training_csv, args.embedding, args.classes, batch_size=args.batch_size, no_samples=args.no_samples, train_val_split=args.train_val_split) print('Reading Validation data.') val_x, val_y = reader.read_all_val() if args.model.startswith('cnn'): val_x = np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1)) with tf.name_scope('Model'): prediction = model.model(x) with tf.name_scope('Loss'): crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y) cost_func = (tf.reduce_mean(crossent)) / args.batch_size
import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import math ############ from ReadData import ReadData from SHE import SHE #####Parameters of experimental device######### global pixel_width #pixel width,unit:nm pixel_width = 320e3 lam_x = 0.124 # X-ray wavelength, unit:nm dis_s2d = 5300e6 #sample-to-detector distance, unit:nm ############constant definition################ pi = math.pi ############################################### init_data = ReadData("data/1.4/50k-132-1.401286.asc") px_min = 122 px_max = 362 pz_min = 166 pz_max = 326 cen_px = (px_max + px_min) / 2 cen_pz = (pz_max + pz_min) / 2 I = init_data[pz_min:(pz_max + 1), px_min:(px_max + 1)] plt.imshow(I[:, 40:200]) plt.axis('off') plt.colorbar() #EI = np.sqrt(I) #I_max = 0.05*np.max(I) ######################### #x = pixel_width*np.arange(px_min-cen_px,px_max-cen_px+1) #z = pixel_width*np.arange(pz_min-cen_pz,pz_max-cen_pz+1)
H = np.dot(np.linalg.pinv(R), P).T x_est = np.dot(H, y) err = x - x_est g = lambda x: np.dot(H, x) R = np.dot(err, err.T) / l return g, R if os.name == 'posix': trainPath = os.path.abspath('.') + '/Data/train.txt' testPath = os.path.abspath('.') + '/Data/test.txt' elif os.name == 'nt': trainPath = os.path.abspath('.') + '\\Data\\train.txt' testPath = os.path.abspath('.') + '\\Data\\test.txt' TrainX, TrainY = ReadData(trainPath, 'train') # TestX, TestY = ReadData(testPath, 'test') # two types of Test DataSet TrainX = TrainX[:, 0:-500] TrainY = TrainY[:, 0:-500] TestX = TrainX[:, -500:] TestY = TrainY[:, -500:] xTrainDim, TrainLen = np.shape(TrainX) print(str(xTrainDim) + " " + str(TrainLen)) yTrainDim, TrainLen = np.shape(TrainY) xTestDim, TestLen = np.shape(TestX) print(str(yTrainDim) + " " + str(TestLen)) # kalman filter
def get_HvM(self, ): # Read Meta Meta = ReadMeta(neuralfeaturesdir) DF_img = Meta.get_DF_img() DF_neu = Meta.get_DF_neu() times = Meta.get_times() # Read Neural data Data = ReadData(datadir, DF_neu) IT, V4 = Data.get_data() D = Mapping.get_Neu_trial_V36(IT[1:], [70, 170], times) image_indices = np.random.randint(low=0, high=D.shape[1], size=ni) D = D[:, image_indices, :] D = np.swapaxes(D, 0, 1) nf = D.shape[1] nt = D.shape[2] mu = np.zeros((self.nf, self.ni)) sd = np.zeros((self.nf, self.ni)) for f in range(self.nf): for i in range(self.ni): mu[f, i] = D[i, f, :].mean() sd[f, i] = D[i, f, :].std() hf = h5py.File(resultdir + 'HvM_stats.h5', 'w') hf.create_dataset('mu', data=mu) hf.create_dataset('sd', data=sd) hf.close() # #test synthetic as HvM # nf = 168 # nt = 46 # noise_dist = 'poisson' # sds = np.logspace(-1, 1, num=int(nf)) # D = np.zeros((ni, nf, nt)) # D_mean = np.random.rand(ni, nf) # for tr in range(nt): # D[:, :, tr] = D_mean # # noise1 = np.zeros((ni, nf, int(nt * splitfract))) # noise2 = np.zeros((ni, nf, int(nt * splitfract))) # for i in range(ni): # if noise_dist == 'normal': # n = np.random.rand() # noise1[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds]) # noise2[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds]) # elif noise_dist == 'poisson': # n = np.random.rand() # noise1[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds]) # noise2[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds]) # # D[:, :, :int(nt * splitfract)] = D[:, :, :int(nt * splitfract)] + noise1 # D[:, :, int(nt * splitfract):] = D[:, :, int(nt * splitfract):] + noise2 # to test HvM as syntheic # hf = h5py.File(resultdir+'D.h5', 'w') # hf.create_dataset('D', data=D) # hf.close() sds = [] Collinearity = 'HvM' noise_dist = 'HvM' return D
from GA import GA from ReadData import ReadData from utils import * data = ReadData("easy_01_tsp.txt") params = {'popSize': 100, 'noGen': 100} ga = GA(params, data.problParams) ga.initialisation() ga.evaluation() bestFitness = 0 bestDist = 0 bestChromoOverallRepres = None for g in range(ga.getParam()['noGen']): ga.oneGenerationElitism() # ga.oneGeneration() # ga.oneGenerationSteadyState() bestChromo = ga.bestChromosome() if bestChromo.fitness > bestFitness: bestChromoOverallRepres = bestChromo.repres bestFitness = bestChromo.fitness bestDist = str(dist(bestChromo.repres, ga.getProblParam())) print('Best solution in generation ' + str(g) + ' is: ' +str(bestChromo.repres) + ' fitness = ' + str(bestChromo.fitness) + ' dist: ' + str(dist(bestChromo.repres,ga.getProblParam()))) print("\n") print('Best solution overall is: ' + str(bestChromoOverallRepres) + ' fitness = ' + str(bestFitness) + ' dist: ' + str(bestDist))
import matplotlib.pyplot as plt import numpy as np import scipy as sp from math import sqrt from ReadData import ReadData from ExtractSamples import ExtractSamples from ColorModels import ColorModels from CrossValidation import CrossValidation from RunKMeans import RunKMeans from RunEM_GMM import RunEM_GMM from RunCommands import RunFCM, RunPCM from sklearn.metrics import confusion_matrix, roc_curve from scipy.stats import multivariate_normal data_train, labels, locations = ReadData() # Load all data Data, ObjLabels = ExtractSamples(data_train, labels, locations) # extract objects plt.close("all") # close all image plots Data = Data / 255 #Normalize pixel values to be between 0 and 1 Data_HSV, Data_YIQ, Data_HLS = ColorModels( Data, ObjLabels) # Transform RGB to different color spaces DTrain, DVal, labelsTrain, labelsVal = CrossValidation( Data, ObjLabels, 0.8, 'RGB') #80% of data for training and 20% for testing DTrain_HSV, DVal_HSV, labelsTrain_HSV, labelsVal_HSV = CrossValidation( Data_HSV, ObjLabels, 0.8, 'HSV') #80% of data for training and 20% for testing DTrain_YIQ, DVal_YIQ, labelsTrain_YIQ, labelsVal_YIQ = CrossValidation( Data_YIQ, ObjLabels, 0.8, 'YIQ') #80% of data for training and 20% for testing
5, 1, stride=2, activation_fn=tf.nn.sigmoid)).tensor def get_generator_loss(D2): '''Loss for the genetor. Maximize probability of generating images that discrimator cannot differentiate. Returns: see the paper ''' return tf.reduce_mean( tf.nn.relu(D2) - D2 + tf.log(1.0 + tf.exp(-tf.abs(D2)))) if __name__ == "__main__": rd = ReadData() input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 32 * 32]) with pt.defaults_scope(activation_fn=tf.nn.elu, batch_normalize=True, learned_moments_update_rate=0.0003, variance_epsilon=0.001, scale_after_normalization=True): with tf.variable_scope("model"): D1 = discriminator(input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) G = generator() with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples
# ,test_size=test_size) #--------DROP ID column from train and test #if ISTRAIN == 1: tmpModel,df = trainingAlgo(X_train,y_train,X_test,y_test) model = tmpModel """ _,acc,rocScore = models.evaluateModel(X_test,y_test,tmpModel) if roc < rocScore: roc = rocScore model = tmpModel print("Accurachy %f, ROC Score %f" % (acc,roc)) """ return model,df #------Get feature set and create classes readData = ReadData(".","HomeCredit","sa","Pass@123") models = Models() featureSet = readData.getData("dbo.FeatureSet") featureSet = models.convertCategoricalVaribalesToOneHotEncoding(featureSet) featureSet = models.addFeatures(featureSet) train = featureSet[featureSet["TARGET"] != -1] test = featureSet[featureSet["TARGET"] == -1] test_ids = test["SK_ID_CURR"] test.drop(["TARGET","SK_ID_CURR"],axis = 1,inplace = True) train.drop(["SK_ID_CURR"],axis = 1,inplace = True) train["TARGET"] = train["TARGET"].astype("category")
def main(): readdata = ReadData() trainingFiles, testingFiles = readdata.filePathConstructor() features = readdata.input_pipeline(trainingFiles, batch_size) with tf.Session() as sess: # Create the graph, etc. init_op = tf.global_variables_initializer() sess.run(init_op) # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) dict1 = { value: (int(key) + 1) for key, value in enumerate(list(string.ascii_lowercase)) } dict1[' '] = 0 dict1[';'] = -1 dict1['-'] = -1 vocab_size = len(dict1) for i in range(1): example_batch = tf.reshape(features, [-1]) item = tf.string_split(example_batch, delimiter="").values.eval() chars = [dict1[alp.decode().lower()] for alp in list(item)] data_size = len(chars) print('Data has %d characters, %d unique.' % (data_size, vocab_size)) # # Hyper-parameters # hidden_size = 100 # hidden layer's size # seq_length = 25 # number of steps to unroll # learning_rate = 1e-1 # inputs = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="inputs") # targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets") # init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state") # intializer = tf.random_normal_initializer(stddev=1.0) # with tf.variable_scope("RNN") as scope: # hs_t = init_state # ys = [] # for t,xs_t in enumerate(tf.split(inputs,seq_length,axis=0)): # if t > 0:scope.reuse_variables() # Wxh = tf.get_variable("Wxh",shape=[vocab_size,hidden_size],dtype=tf.float32,intializer=intializer) # Whh = tf.get_variable("Whh",shape=[hidden_size,hidden_size],dtype=tf.float32,intializer=intializer) # Why = tf.get_variable("Why",shape=[hidden_size,vocab_size],dtype=tf.float32,intializer=initializer) # bh = tf.get_variable("bh",shape=[hidden_size],intializer=intializer) # by = tf.get_variable("by",shape=[vocab_size],initializer=intializer) # hs_t = tf.tanh(tf.matmul(xs_t,Wxh) + tf.matmul(hs_t,Whh) + bh) # ys_t = tf.matmul(hs_t,Why) + by # ys.append(ys_t) # h_prev = hs_t # output_softmax = tf.nn.softmax(ys[-1]) # outputs = tf.concat(ys,axis=0) # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs)) # #optimizer # minimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # grad_and_vars = minimizer.compute_gradients(loss) # pred = RNN(chars,weights,biases) # # Loss and optimizer # # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) # # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost) # # # Model evaluation # # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) # # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # # print(example_batch) coord.request_stop() coord.join(threads)
''' Created on 5 apr. 2020 @author: Alexandraah ''' from GA import GA from ReadData import ReadData from utils import * import matplotlib.pyplot as plt data = ReadData("C:\\@Alexandra\\anul2\\semestrul2\\ai\\lab\\laborator4\\berlin.txt") params = {'popSize': 500, 'noGen': 1000} ga = GA(params, data.problParams) ga.initialisation() ga.evaluation() res=[] res1=[] for i in range(params['noGen']): #ga.oneGeneration() ga.oneGenerationElitism() #ga.oneGenerationSteadyState() best = ga.bestChromosome() fitnesses = [c.fitness for c in ga.population] avgFitness = sum(fitnesses) / len(fitnesses) res.append(avgFitness) for c in ga.population: res1.append(c.fitness) print("Fiteness:"+str(c.fitness)+"\n") print('Generation: ' + str(i) + '\nBest chromosome: ' + str(best.repres) + '\nLocal best fitness: ' + str(best.fitness)
atom_close = self.dynamic(atom_close,"close",index) similar_value = 0.5 * self.calc_pearson(mul_open,atom_open) + 0.5 * self.calc_pearson(mul_close,atom_close) if (result["pearson_index"] < similar_value): result = { "start_time": temp_compare[0]["trade_date"], "end_time": temp_compare[-1]["trade_date"], "pearson_index": similar_value } # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S") # print("Calc Cost: {}".format(str((end_time - start_time).seconds))) return result if __name__ =='__main__': read_data = ReadData() ts_code_list = read_data.mysql_read_ts_code()[:] print("----------------Complete ts_code reading--------------------:{}") start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print("--------Start cal----------:{0}".format(start_time)) results_dict = {} source_data = read_data.mysql_read_data("000009.SZ").iloc[-60:] for ts_code in ts_code_list: #print(ts_code) compare_data = read_data.mysql_read_data(ts_code).iloc[:-60] if len(compare_data) < 60: print("该股票数据不足") else: compare = CompareSimilarKDynamic(source_data, compare_data, 60) result = compare.compare_dynamic() results_dict[ts_code] = result
default='nmt_logs') parser.add_argument( '--inference', action="store_true", help='Whether to run inference or simply train the network') parser.add_argument('--pretrained_path', help='Path to Pre-trained Weights') args = parser.parse_args() assert args.dataset.endswith('csv'), "Dataset File needs to be in CSV format" assert 0. <= args.train_val_split < 1., "Train-vs-Validation Split need to be between [0, 1)" latent_dim = args.latent_dim # Reading and Preparing Training/Validation Dataset reader = ReadData(args.dataset, args.train_val_split, args.language_1, args.language_2) (X_train, y_train), (X_val, y_val) = reader.prep_data() train_samples = len(X_train) val_samples = len(X_val) num_encoder_tokens = reader.num_encoder_tokens num_decoder_tokens = reader.num_decoder_tokens # Loading Embedding Matrix lang1_embedding = Word2Vec.load(args.lang1_embedding) lang1_tok = Tokenizer() lang1_tok.fit_on_texts(reader.language_1_text) encoder_embedding_matrix = np.zeros((num_encoder_tokens, latent_dim)) for word, i in lang1_tok.word_index.items(): try: embedding_vector = lang1_embedding[word]
metrics=['accuracy']) else: model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.load_weights(args.weights) print('Model Loaded from {}.'.format(args.weights)) model.summary() embedding = {'type': args.embedding_type, 'path': args.embedding_path} if args.model == 'sentence_pair': reader = ReadData(path_file=None, embedding_config=embedding, data_shape=inputs, sentence_pair=True) else: reader = ReadData(path_file=None, embedding_config=embedding, data_shape=inputs, sentence_pair=False) test_data = pd.read_excel(args.dataset, sheet_name=None)['Sheet1'] test_data = test_data.sample(frac=1.0).reset_index(drop=True) test_data = test_data.head(int(len(test_data) * args.size)) print(test_data.columns) assert len(test_data.columns) > 1, "Labels of Test set not available."
def main(args): # judge input arguments length if len(args) != 6: print('Should Have Six Input Arguments') exit(0) # input parameters L = int(args[0]) K = int(args[1]) training_set_file_name = args[2] validation_set_file_name = args[3] test_set_file_name = args[4] to_print = True if args[5].lower() == 'yes' else False path = './' + DATA_DIRECTORY + '/' # read data from training set, test set, and validation set rd = ReadData() labels, training_set = rd.createDataSet(path + training_set_file_name) labels, validation_set = rd.createDataSet(path + validation_set_file_name) labels, test_set = rd.createDataSet(path + test_set_file_name) # build tree dt = DecisionTree() info_gain_tree_root = dt.buildDT(training_set, labels.copy(), 'information_gain') pruned_info_gain_tree_root = dt.pruneTree(info_gain_tree_root, L, K, validation_set, labels) variance_impurity_tree_root = dt.buildDT(training_set, labels.copy(), 'variance_impurity') pruned_variance_impurity_tree_root = dt.pruneTree( variance_impurity_tree_root, L, K, validation_set, labels) print() info_accuracy = dt.calAccuracy(test_set, info_gain_tree_root, labels) print('Accuracy of decision tree constructed using information gain: %s' % info_accuracy) variance_accuracy = dt.calAccuracy(test_set, variance_impurity_tree_root, labels) print('Accuracy of decision tree constructed using variance impurity: %s' % variance_accuracy) prune_info_accuracy = dt.calAccuracy(test_set, pruned_info_gain_tree_root, labels) print( 'Accuracy of pruned decision tree constructed using information gain: %s' % prune_info_accuracy) pruned_variance_accuracy = dt.calAccuracy( test_set, pruned_variance_impurity_tree_root, labels) print( 'Accuracy of pruned decision tree constructed using variance impurity: %s' % pruned_variance_accuracy) if (to_print): print() print('Build Decision Tree By Using Information Gain') info_gain_tree_root.printTree() print() print() print('Build Decision Tree By Using Variance Impurity') variance_impurity_tree_root.printTree() print()