def apply(df, config, header, dataset_features): debug = config['debug'] #------------------------ rows = df.shape[0] columns = df.shape[1] final_predictions = pd.DataFrame(np.zeros([rows, 1]), columns=['prediction']) worksheet = df.copy() worksheet['weight'] = 1 #/ rows tmp_df = df.copy() tmp_df['Decision'] = worksheet['weight'] * tmp_df[ 'Decision'] #normal distribution for i in range(0, 1): root = 1 file = "outputs/rules/rules_" + str(i) + ".py" if debug == False: functions.createFile(file, header) #print(tmp_df) Training.buildDecisionTree(tmp_df, root, file, config, dataset_features) #print(final_predictions) """for row, instance in final_predictions.iterrows():
def main(): auto = Auto() if USER_SETTINGS.get('daily_bonus', 'on'): print_d("> Daily Bonus <") auto.get_daily_bonus() if USER_SETTINGS.get('bonus_from_partners', 'on'): print_d("> Bonus from Partners <") auto.get_bonus_from_partners() if USER_SETTINGS.get('club_sales', 'on'): print_d("> Club Sales <") auto.get_club_sales() if USER_SETTINGS.get('get_training_points', 'on'): print_d("> Training Points <") TP_settings = USER_SETTINGS.get_section_items('get_training_points') auto.get_training_points(**TP_settings) if USER_SETTINGS.get('morale', 'on'): print_d("> Morale <") morale_boost = MoraleBoost() morale_boost.__call__() if USER_SETTINGS.get('training', 'on'): print_d("> Training <") training = Training() training.__call__() if USER_SETTINGS.get('extra_training', 'on'): print_d("> Extra Training <") extra_training = ExtraTraining() extra_training.__call__()
def __init__(self, file_name, file_to_dump): try: self.file_name = "files/" + file_name self.file_to_dump = file_to_dump self.iterations = 2 self.skipGram = Training(self.file_name) self.lexicon = FinanceLexicon() self.word_vectors = self.skipGram.load_vectors(self.file_to_dump) except: # do nothing print()
def add_training(self, t_day, t_time, t_instructor): t = Training(day = t_day, time = t_time, instructor = t_instructor) self.response.write(""" <p> <b>Treino adicionado:</b><br/> <b>Dia:</b> %s - %s <br/> <b>Instrutor:</b> %s </p> """ %(t.day, t.time, t.instructor)) t.put() Mailer.send_mail(self, "*****@*****.**", "Novo treino", "Novo treino cadastrado:\n\tDia: %s - %s\n\tProfessor: %s" %(t_day, t_time, t_instructor))
def handleOnKeyPress(event): if event.key == "enter": # inicializamos y ejecutamos el algoritmo de entrenamiento if len(neuronas) > 0: algoritmo = Training(neuronas, ETA, TOLERANCIA, LIMITE_EPOCAS) algoritmo.run() else: print("[!] ERROR: no hay patrones en la lista.\n") elif event.key == "backspace": # limpiamos el gráfico y la lista de patrones neuronas.clear() plt.cla() plt.title("Adaline") plt.grid(True) plt.xlim([-1, 4]) plt.ylim([-1, 4]) plt.draw()
def apply(df, config, header, dataset_features): debug = config['debug'] num_of_trees = config['num_of_trees'] for i in range(0, num_of_trees): subset = df.sample(frac=1 / num_of_trees) root = 1 file = "outputs/rules/rule_" + str(i) + ".py" if debug == False: functions.createFile(file, header) Training.buildDecisionTree(subset, root, file, config, dataset_features)
def learn(): train = Training.readFromFile("training/AND") graph = train.train(0.5) print repr(graph) print "[0,0]", graph.activate([0,0],0.5) print "[1,0]", graph.activate([1,0],0.5) print "[0,1]", graph.activate([0,1],0.5) print "[1,1]", graph.activate([1,1],0.5) return graph
def display_page(pathname): if pathname == '' or pathname == '/' or pathname == config.login_url: # print('--> login') return Login() elif pathname == config.home_url: # print('--> home') return Homepage() elif pathname == config.training_url: # print('--> training') return Training() elif pathname == config.classification_url: # print('--> classification') return Classification()
def training(): try: run_time_logger.add_in_logs("Started Model Training") global overall_accuracy overall_accuracy = Training().train(path , data_base_name , data_base_table_name) overall_accuracy = round(overall_accuracy , 2) return render_template("done.html" , text = "Training completed successfully") except Exception as e: run_time_logger.add_in_logs("Faced an error") run_time_logger.add_in_logs( "Training") run_time_logger.add_in_logs("Error on line number : {}".format(sys.exc_info()[-1].tb_lineno)) run_time_logger.add_in_logs((str(e))) return render_template("error.html" , text = str(e))
def home_page(): try: run_time_logger.add_in_logs("Started Model Training") global overall_accuracy overall_accuracy = Training().train(path , data_base_name , data_base_table_name) overall_accuracy = round(overall_accuracy , 2) run_time_logger.add_in_logs("Log in to application") return render_template("home_page.html") except: run_time_logger.add_in_logs("Faced an error") run_time_logger.add_in_logs( "Home Page module") run_time_logger.add_in_logs("Error on line number : {}".format(sys.exc_info()[-1].tb_lineno)) run_time_logger.add_in_logs(str(e)) return render_template("error.html" , text = str(e) )
def train(self): X_train, y_train, X_test, y_test = self.dataset_ops() self.mdl = self.baseline_model() # print(self.mdl.summary()) Training(model=self.mdl, X_train=X_train, Y_train=y_train, X_test=X_test, Y_test=y_test, optimizer=keras.optimizers.RMSprop(lr=1e-4), loss='categorical_crossentropy', metrics=['acc', self.f1], epochs=3, summaries_directory="./summaries", tensorboard_write_grad=True).train() return self.mdl
def main(f=None): args = parser.parse_args() if args.cmd_type == "embeddings": build_embeddings(args.data, args.output) elif args.cmd_type == "train": print("Loading embeddings ...") embeddings = EmbeddingsData.load(args.embeddings_data_file) print("Loading the data ...") the_data = Data.make_data(args.train_file, args.dev_file, args.batch_size) print("Building the model ...") model = build_model(embeddings, args.batch_size) train_sess = Training.make_training(model, the_data, args.epoch_count) while train_sess.has_more_epochs(): print("Next epoch ...") train_sess.next_epoch() # TODO save model else: # ? exit(1) return 0
def fit(df, config): target_label = df.columns[len(df.columns) - 1] if target_label != 'Decision': print("Expected: Decision, Existing: ", target_label) raise ValueError( 'Please confirm that name of the target column is "Decision" and it is put to the right in pandas data frame' ) #------------------------ #initialize params and folders config = functions.initializeParams(config) functions.initializeFolders() #------------------------ algorithm = config['algorithm'] valid_algorithms = ['ID3', 'C4.5', 'CART', 'Regression'] if algorithm not in valid_algorithms: raise ValueError('Invalid algorithm passed. You passed ', algorithm, " but valid algorithms are ", valid_algorithms) #------------------------ enableRandomForest = config['enableRandomForest'] num_of_trees = config['num_of_trees'] enableMultitasking = config['enableMultitasking'] enableGBM = config['enableGBM'] epochs = config['epochs'] learning_rate = config['learning_rate'] enableAdaboost = config['enableAdaboost'] #------------------------ raw_df = df.copy() num_of_rows = df.shape[0] num_of_columns = df.shape[1] if algorithm == 'Regression': if df['Decision'].dtypes == 'object': raise ValueError( 'Regression trees cannot be applied for nominal target values! You can either change the algorithm or data set.' ) if df['Decision'].dtypes != 'object': #this must be regression tree even if it is not mentioned in algorithm algorithm = 'Regression' config['algorithm'] = 'Regression' global_stdev = df['Decision'].std(ddof=0) if enableGBM == True: print("Gradient Boosting Machines...") algorithm = 'Regression' config['algorithm'] = 'Regression' if enableAdaboost == True: for j in range(0, num_of_columns): column_name = df.columns[j] if df[column_name].dtypes == 'object': raise ValueError( 'Adaboost must be run on numeric data set for both features and target' ) #------------------------- print(algorithm, " tree is going to be built...") dataset_features = dict( ) #initialize a dictionary. this is going to be used to check features numeric or nominal. numeric features should be transformed to nominal values based on scales. header = "def findDecision(" header = header + "obj" header = header + "): #" num_of_columns = df.shape[1] - 1 for i in range(0, num_of_columns): column_name = df.columns[i] dataset_features[column_name] = df[column_name].dtypes header = header + "obj[" + str(i) + "]: " + column_name if i != num_of_columns - 1: header = header + ", " header = header + "\n" #------------------------ begin = time.time() trees = [] alphas = [] if enableAdaboost == True: trees, alphas = adaboost.apply(df, config, header, dataset_features) elif enableGBM == True: if df['Decision'].dtypes == 'object': #transform classification problem to regression trees, alphas = gbm.classifier(df, config, header, dataset_features) classification = True else: #regression trees = gbm.regressor(df, config, header, dataset_features) classification = False elif enableRandomForest == True: trees = randomforest.apply(df, config, header, dataset_features) else: #regular decision tree building root = 1 file = "outputs/rules/rules.py" functions.createFile(file, header) trees = Training.buildDecisionTree(df, root, file, config, dataset_features) print("finished in ", time.time() - begin, " seconds") obj = {"trees": trees, "alphas": alphas, "config": config} return obj
from training import Training from database import Database workout = Database() workout.import_data() first_try = Training(90, "ENG") #set duration and language ("PL"/"ENG") pattern = (first_try.import_pattern())[0] first_try.generate_plan(pattern) first_try.print_training() #saving (line below) is optional, it stores instructions in a txt file #first_try.save_training()
def main(): X = [] Y = [] char2intDict = None int2charDict = None vocabulary = None config = FileHelper.load_config('config.json') seq_length = config['preprocessing']['sequence_chars_length'] # Load data or preprocess if not config['preprocessing']['exec_preprocessing']: X = FileHelper.load_object_from_file( config['preprocessing']['checkpoints']['X_file']) Y = FileHelper.load_object_from_file( config['preprocessing']['checkpoints']['Y_file']) char2intDict = FileHelper.load_object_from_file( config['preprocessing']['checkpoints']['char2intDict_file']) int2charDict = FileHelper.load_object_from_file( config['preprocessing']['checkpoints']['int2charDict_file']) else: preprocessing = Preprocessing(config) X, Y, char2intDict, int2charDict = preprocessing.preprocess() FileHelper.save_object_to_file( config['preprocessing']['checkpoints']['X_file'], X) FileHelper.save_object_to_file( config['preprocessing']['checkpoints']['Y_file'], Y) vocabulary = FileHelper.load_object_from_file( config['preprocessing']['checkpoints']['vocabulary_file']) # Save the unshaped version of X because it's needed for generation later X_unshaped = X # Transform the data to the format the LTSM expects it [samples, timesteps, features] X = numpy.reshape(X, (len(X), seq_length, 1)) # Normalize/rescale all integers to range 0-1 X = X / float(len(vocabulary)) # As usual do one-hot encoding for categorial variables to the output variables (vector of zeros with a single 1 --> 0..N-1 categories) Y = np_utils.to_categorical(Y) training = Training(config) # Define the model model = training.define_model(X, Y) if config['training']['exec_training']: # Train the model model = training.train(X, Y, char2intDict, vocabulary, model) else: # Just set the previously trained weights for the model model.load_weights(config['training']['load_weights_filename']) model.compile(loss='categorical_crossentropy', optimizer='adam') if config['generation']['exec_generation']: # Generate the random seed used as starting value for text generation seed = generate_random_seed(X_unshaped) generatedText = generate_text( config['generation']['text_chars_length'], int2charDict, vocabulary, seed, model) # Save the generated text to file outputFilename = config['generation']['foldername'] + '/' + \ datetime.datetime.now().strftime('%Y%m%d_%H_%M_%S') + '.txt' FileHelper.write_data(outputFilename, generatedText)
def apply(df, config, header, dataset_features): models = [] alphas = [] initializeAlphaFile() num_of_weak_classifier = config['num_of_weak_classifier'] #------------------------ rows = df.shape[0] columns = df.shape[1] final_predictions = pd.DataFrame(np.zeros([rows, 1]), columns=['prediction']) worksheet = df.copy() worksheet['Weight'] = 1 / rows #uniform distribution initially final_predictions = pd.DataFrame(np.zeros((df.shape[0], 2)), columns=['Prediction', 'Actual']) final_predictions['Actual'] = df['Decision'] #for i in range(0, num_of_weak_classifier): pbar = tqdm(range(0, num_of_weak_classifier), desc='Adaboosting') for i in pbar: worksheet['Decision'] = worksheet['Weight'] * worksheet['Decision'] root = 1 file = "outputs/rules/rules_" + str(i) + ".py" functions.createFile(file, header) #print(worksheet) Training.buildDecisionTree(worksheet.drop(columns=['Weight']), root, file, config, dataset_features) #--------------------------------------- moduleName = "outputs/rules/rules_" + str(i) fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) models.append(myrules) #--------------------------------------- df['Epoch'] = i worksheet['Prediction'] = df.apply(findPrediction, axis=1) df = df.drop(columns=['Epoch']) #--------------------------------------- worksheet['Actual'] = df['Decision'] worksheet['Loss'] = abs(worksheet['Actual'] - worksheet['Prediction']) / 2 worksheet[ 'Weight_Times_Loss'] = worksheet['Loss'] * worksheet['Weight'] epsilon = worksheet['Weight_Times_Loss'].sum() alpha = math.log( (1 - epsilon) / epsilon) / 2 #use alpha to update weights in the next round alphas.append(alpha) #----------------------------- #store alpha addEpochAlpha(i, alpha) #----------------------------- worksheet['Alpha'] = alpha worksheet['New_Weights'] = worksheet['Weight'] * ( -alpha * worksheet['Actual'] * worksheet['Prediction']).apply( math.exp) #normalize worksheet['New_Weights'] = worksheet['New_Weights'] / worksheet[ 'New_Weights'].sum() worksheet['Weight'] = worksheet['New_Weights'] worksheet['Decision'] = df['Decision'] final_predictions['Prediction'] = final_predictions[ 'Prediction'] + worksheet['Alpha'] * worksheet['Prediction'] #print(final_predictions) worksheet = worksheet.drop(columns=[ 'New_Weights', 'Prediction', 'Actual', 'Loss', 'Weight_Times_Loss', 'Alpha' ]) mae = (np.abs(final_predictions['Prediction'].apply(functions.sign) - final_predictions['Actual']) / 2).sum() / final_predictions.shape[0] #print(mae) pbar.set_description("Epoch %d. Loss: %d. Process: " % (i + 1, mae)) #------------------------------ final_predictions['Prediction'] = final_predictions['Prediction'].apply( functions.sign) final_predictions['Absolute_Error'] = np.abs( final_predictions['Actual'] - final_predictions['Prediction']) / 2 #print(final_predictions) mae = final_predictions['Absolute_Error'].sum( ) / final_predictions.shape[0] print("Loss (MAE) found ", mae, " with ", num_of_weak_classifier, ' weak classifiers') return models, alphas
## Set configuration options ## config = Config(sys.argv[1]) vb.level = config.verbose_level vb.initialize() if not config.runTraining and not config.runInference: vb.ERROR("RUN : No configuration set ") vb.ERROR( "RUN : Please set the arguments 'runTraining' or 'runInference' to define workflow " ) vb.ERROR("RUN : Exiting.") sys.exit(1) ## Setup Deep Learning class dnn = Training() dnn.variable_labels = plb.variable_labels() dnn.sample_labels = plb.sample_labels() dnn.hep_data = config.hep_data dnn.model_name = config.dnn_data dnn.msg_svc = vb dnn.treename = config.treename dnn.useLWTNN = True dnn.dnn_name = "dnn" dnn.output_dim = config.output_dim dnn.loss = config.loss dnn.init = config.init dnn.nNodes = config.nNodes dnn.dropout = None
os.path.join(args.input, args.save_data + ".unlabel.pth")) # Reading the word vocab file with open(os.path.join(args.input, args.save_data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) # Reading the label vocab file with open(os.path.join(args.input, args.save_data + '.label.pickle'), 'rb') as f: id2label = pickle.load(f) args.id2w = id2w args.n_vocab = len(id2w) args.id2label = id2label args.num_classes = len(id2label) object = Training(args, logger) logger.info('Corpus: {}'.format(args.corpus)) logger.info('Pytorch Model') logger.info(repr(object.embedder)) logger.info(repr(object.encoder)) logger.info(repr(object.clf)) logger.info(repr(object.clf_loss)) if args.lambda_ae: logger.info(repr(object.ae)) # Train the model object(train_data, dev_data, test_data, unlabel_data)
def __init__(self): Training.__init__(self)
def RunTraining(patterns): if len(patterns) > 0: algoritmo = Training(patterns) algoritmo.run() else: print("[!] ERROR. No hay patrones en la lista.\n")
def fit(df, config): target_label = df.columns[len(df.columns) - 1] if target_label != 'Decision': print("Expected: Decision, Existing: ", target_label) raise ValueError('Lỗi dữ liệu, hãy chuyển dữ liệu về đúng định dạng!') #------------------------ #initialize params and folders config = functions.initializeParams(config) functions.initializeFolders() algorithm = config['algorithm'] RandomForest = config['RandomForest'] num_of_trees = config['num_of_trees'] #------------------------ raw_df = df.copy() num_of_rows = df.shape[0] num_of_columns = df.shape[1] if algorithm == 'Regression': if df['Decision'].dtypes == 'object': raise ValueError( 'Lỗi dữ liệu khi chạy kết quả dạng Regression Tree') if df['Decision'].dtypes != 'object': algorithm = 'Regression' config['algorithm'] = 'Regression' global_stdev = df['Decision'].std(ddof=0) #------------------------- print(algorithm, ": Đang tiến hành tạo cây quyết định...") dataset_features = dict() # dictionary header = "def findDecision(" header = header + "obj" header = header + "): #" num_of_columns = df.shape[1] - 1 for i in range(0, num_of_columns): column_name = df.columns[i] dataset_features[column_name] = df[column_name].dtypes header = header + "obj[" + str(i) + "]: " + column_name if i != num_of_columns - 1: header = header + ", " header = header + "\n" #------------------------ begin = time.time() trees = [] alphas = [] if RandomForest == True: trees = randomforest.apply(df, config, header, dataset_features) else: root = 1 file = "outputs/rules/rules.py" functions.createFile(file, header) trees = Training.buildDecisionTree(df, root, file, config, dataset_features) print("Thuật toán chạy hoàn thành trong: ", time.time() - begin, " giây") obj = {"trees": trees, "alphas": alphas, "config": config} return obj
preprocessor = Preprocessor() if not os.path.exists(WORD2VEC_FILE): preprocessor.generateFixedLength(RAW_DATA_FILE, SEQUENCE_SIZE, TRAINING_DATE_RATE, TRAIN_FIXED_FILE, TEST_FIXED_FILE) W2V = preprocessor.makeW2Vfile(RAW_DATA_FILE, WORD2VEC_FILE, VECTOR_SIZE, SEQUENCE_SIZE, 0) else: W2V = gensim.models.Word2Vec.load(WORD2VEC_FILE) LEARNING_RATE = 0.01 BATCH_SIZE = 2059 ITER_NUM = 5 DROPOUT_RATE = 0.7 EARLY_STOP_COUNT = 3 """ training X_DATA, Y_DATA = preprocessor.getVectorData(TRAIN_FIXED_FILE, W2V, SEQUENCE_SIZE) training = Training(LEARNING_RATE, BATCH_SIZE, ITER_NUM, SEQUENCE_SIZE, VECTOR_SIZE, DROPOUT_RATE, EARLY_STOP_COUNT) training.train(X_DATA, Y_DATA) """ sentence = "그책에는이별이야기가있을까어쩌면네가지금막귀퉁이를접고있는페이지에" X_DATA = preprocessor.getXVectorData(sentence, W2V, SEQUENCE_SIZE) predicting = Training(LEARNING_RATE, 1, ITER_NUM, SEQUENCE_SIZE, VECTOR_SIZE, drop_out_rate=1.0) predicted_sentence = predicting.predict(X_DATA, sentence) print(predicted_sentence)
def classifier(df, config, header, dataset_features): models = [] print("gradient boosting for classification") epochs = config['epochs'] temp_df = df.copy() original_dataset = df.copy() worksheet = df.copy() classes = df['Decision'].unique() boosted_predictions = np.zeros([df.shape[0], len(classes)]) pbar = tqdm(range(0, epochs), desc='Boosting') #store actual set, we will use this to calculate loss actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]), columns=classes) for i in range(0, len(classes)): current_class = classes[i] actual_set[current_class] = np.where(df['Decision'] == current_class, 1, 0) actual_set = actual_set.values #transform it to numpy array #for epoch in range(0, epochs): for epoch in pbar: for i in range(0, len(classes)): current_class = classes[i] if epoch == 0: temp_df['Decision'] = np.where(df['Decision'] == current_class, 1, 0) worksheet['Y_' + str(i)] = temp_df['Decision'] else: temp_df['Decision'] = worksheet['Y-P_' + str(i)] predictions = [] #change data type for decision column temp_df[['Decision']].astype('int64') root = 1 file = "outputs/rules/rules-for-" + current_class + "-round-" + str( epoch) + ".py" functions.createFile(file, header) Training.buildDecisionTree(temp_df, root, file, config, dataset_features) #decision rules created #---------------------------- #dynamic import moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str( epoch) fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) num_of_columns = df.shape[1] for row, instance in df.iterrows(): features = [] for j in range(0, num_of_columns - 1): #iterate on features features.append(instance[j]) actual = temp_df.loc[row]['Decision'] prediction = myrules.findDecision(features) predictions.append(prediction) #---------------------------- if epoch == 0: worksheet['F_' + str(i)] = 0 else: worksheet['F_' + str(i)] = pd.Series(predictions).values boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[ 'F_' + str(i)].values.astype(np.float32) #print(boosted_predictions[0:5,:]) worksheet['P_' + str(i)] = 0 #---------------------------- temp_df = df.copy() #restoration for row, instance in worksheet.iterrows(): f_scores = [] for i in range(0, len(classes)): f_scores.append(instance['F_' + str(i)]) probabilities = functions.softmax(f_scores) for j in range(0, len(probabilities)): instance['P_' + str(j)] = probabilities[j] worksheet.loc[row] = instance for i in range(0, len(classes)): worksheet['Y-P_' + str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' + str(i)] prediction_set = np.zeros([df.shape[0], len(classes)]) for i in range(0, boosted_predictions.shape[0]): predicted_index = np.argmax(boosted_predictions[i]) prediction_set[i][predicted_index] = 1 #---------------------------- #find loss for this epoch: prediction_set vs actual_set classified = 0 for i in range(0, actual_set.shape[0]): actual = np.argmax(actual_set[i]) prediction = np.argmax(prediction_set[i]) #print("actual: ",actual," - prediction: ",prediction) if actual == prediction: classified = classified + 1 accuracy = str(100 * classified / actual_set.shape[0]) + "%" #---------------------------- #print(worksheet.head()) #print("round ",epoch+1) pbar.set_description("Epoch %d. Accuracy: %s. Process: " % (epoch + 1, accuracy)) return models, classes
def regressor(df, config, header, dataset_features): models = [] algorithm = config['algorithm'] enableRandomForest = config['enableRandomForest'] num_of_trees = config['num_of_trees'] enableMultitasking = config['enableMultitasking'] enableGBM = config['enableGBM'] epochs = config['epochs'] learning_rate = config['learning_rate'] enableAdaboost = config['enableAdaboost'] #------------------------------ boosted_from = 0 boosted_to = 0 #------------------------------ base_df = df.copy() #gbm will manipulate actuals. store its raw version. target_values = base_df['Decision'].values num_of_instances = target_values.shape[0] root = 1 file = "outputs/rules/rules0.py" functions.createFile(file, header) Training.buildDecisionTree(df, root, file, config, dataset_features) #generate rules0 df = base_df.copy() base_df['Boosted_Prediction'] = 0 #------------------------------ pbar = tqdm(range(1, epochs + 1), desc='Boosting') #for index in range(1,epochs+1): #for index in tqdm(range(1,epochs+1), desc='Boosting'): for index in pbar: #print("epoch ",index," - ",end='') loss = 0 #run data(i-1) and rules(i-1), save data1 #dynamic import moduleName = "outputs/rules/rules%s" % (index - 1) fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) new_data_set = "outputs/data/data%s.csv" % (index) f = open(new_data_set, "w") #put header in the following file columns = df.shape[1] mae = 0 #---------------------------------------- df['Epoch'] = index df['Prediction'] = df.apply(findPrediction, axis=1) base_df['Boosted_Prediction'] += df['Prediction'] loss = (base_df['Boosted_Prediction'] - base_df['Decision']).pow(2).sum() if index == 1: boosted_from = loss / num_of_instances elif index == epochs: boosted_to = loss / num_of_instances df['Decision'] = int(learning_rate) * (df['Decision'] - df['Prediction']) df = df.drop(columns=['Epoch', 'Prediction']) #--------------------------------- df.to_csv(new_data_set, index=False) #data(i) created #--------------------------------- file = "outputs/rules/rules" + str(index) + ".py" functions.createFile(file, header) current_df = df.copy() Training.buildDecisionTree(df, root, file, config, dataset_features) df = current_df.copy( ) #numeric features require this restoration to apply findDecision function #rules(i) created loss = loss / num_of_instances #print("epoch ",index," - loss: ",loss) #print("loss: ",loss) pbar.set_description("Epoch %d. Loss: %d. Process: " % (index, loss)) #--------------------------------- print(num_of_instances, " instances are boosted from ", boosted_from, " to ", boosted_to, " in ", epochs, " epochs") return models
np.random.seed(7) if len(sys.argv) < 3: print("Usage blindspot data-file parameter-file test-suffix") exit() mode = sys.argv[1] if mode == "train" or mode == "onlyexport": data_file = sys.argv[2] parameter_file = sys.argv[3] examples_file = sys.argv[4] with codecs.open(parameter_file, 'r', 'utf-8') as f: params = json.load(f) with codecs.open(examples_file, 'r', 'utf-8') as f: examples = json.load(f) t = Training(data_file=data_file, examples=examples) for p in params: print("Trying:{}".format(p)) p["only_export"] = (mode == "onlyexport") t.train(**p) if mode == "predict": vocab_file = sys.argv[2] model_file = sys.argv[3] text = sys.argv[4] p = Prediction(model_file, vocab_file) print(p.predict(text)) if mode == "predict_server": data_file = sys.argv[2]
def fit(df, config): #config parameters debug = config['debug'] algorithm = config['algorithm'] enableRandomForest = config['enableRandomForest'] num_of_trees = config['num_of_trees'] enableMultitasking = config['enableMultitasking'] enableGBM = config['enableGBM'] epochs = config['epochs'] learning_rate = config['learning_rate'] enableAdaboost = config['enableAdaboost'] #------------------------ if algorithm == 'Regression': if df['Decision'].dtypes == 'object': raise ValueError( 'Regression trees cannot be applied for nominal target values! You can either change the algorithm or data set.' ) if df['Decision'].dtypes != 'object': #this must be regression tree even if it is not mentioned in algorithm algorithm = 'Regression' config['algorithm'] = 'Regression' global_stdev = df['Decision'].std(ddof=0) if enableGBM == True: debug = False #gbm needs rules files to iterate algorithm = 'Regression' config['algorithm'] = 'Regression' #------------------------- print(algorithm, " tree is going to be built...") dataset_features = dict( ) #initialize a dictionary. this is going to be used to check features numeric or nominal. numeric features should be transformed to nominal values based on scales. if (True): #header of rules files header = "def findDecision(" num_of_columns = df.shape[1] - 1 for i in range(0, num_of_columns): if debug == True: if i > 0: header = header + "," header = header + df.columns[i] column_name = df.columns[i] dataset_features[column_name] = df[column_name].dtypes if debug == False: header = header + "obj" header = header + "):\n" if debug == True: print(header, end='') #------------------------ begin = time.time() if enableAdaboost == True: adaboost.apply(df, config, header, dataset_features) elif enableGBM == True: if df['Decision'].dtypes == 'object': #transform classification problem to regression gbm.classifier(df, config, header, dataset_features) else: #regression gbm.regressor(df, config, header, dataset_features) elif enableRandomForest == True: randomforest.apply(df, config, header, dataset_features) else: #regular decision tree building root = 1 file = "outputs/rules/rules.py" if debug == False: functions.createFile(file, header) Training.buildDecisionTree(df, root, file, config, dataset_features) print("finished in ", time.time() - begin, " seconds")
mischievous_p = Perceptron(mischievous_weights, 3) angry_p = Perceptron(angry_weights, 4) perceptron_array = [happy_p, sad_p, mischievous_p, angry_p] # The training starts. percentage = 0 # Trains as long as the score is lower than 75% while percentage < 0.8: print("Training...") total_training_result = [] for i in range(len(image_array)): for j in range(len(perceptron_array)): # Calculates the output for every perceptron on every image, and trains it. perceptron_array[j].activate_1(image_array[i]) session = Training(image_array[i], perceptron_array[j], facit[i]) session.train() # Look which perceptron that was most active. winner = get_winner(perceptron_array) total_training_result.append(winner) # Calculates the percentage of correct answers. percentage = calc_points(total_training_result, facit) percentage = percentage / len(image_array) print("I got %.2f percent correct this training round." % (percentage * 100)) time.sleep(1) print("Let´s do the test!") print("________________________________")
def apply(df, config, header, dataset_features): models = [] num_of_trees = config['num_of_trees'] pbar = tqdm(range(0, num_of_trees), desc='Bagging') for i in pbar: #for i in range(0, num_of_trees): pbar.set_description("Sub decision tree %d is processing" % (i + 1)) subset = df.sample(frac=1 / num_of_trees) root = 1 moduleName = "outputs/rules/rule_" + str(i) file = moduleName + ".py" functions.createFile(file, header) Training.buildDecisionTree(subset, root, file, config, dataset_features) #-------------------------------- fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) models.append(myrules) #------------------------------- #check regression or classification if df['Decision'].dtypes == 'object': problem_type = 'classification' else: problem_type = 'regression' actual_values = df['Decision'].values num_of_features = df.shape[1] - 1 #discard Decision number_of_instances = df.shape[0] global_predictions = [] #if classification get the max number of prediction if problem_type == 'classification': for i in range(0, num_of_trees): moduleName = "outputs/rules/rule_" + str(i) fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) predictions = [] for index, instance in df.iterrows(): params = [] for j in range(0, num_of_features): params.append(instance[j]) #index row, i th column prediction = myrules.findDecision(params) predictions.append(prediction) #print(i,"th tree prediction: ",prediction) #print(predictions) global_predictions.append(predictions) #------------------------------- classified = 0 for index, instance in df.iterrows(): actual = actual_values[index] predictions = [] for i in range(0, num_of_trees): prediction = global_predictions[i][index] if prediction != None: #why None exists in some cases? predictions.append(prediction) predictions = np.array(predictions) unique_values = np.unique(predictions) if unique_values.shape[0] == 1: prediction = unique_values[0] else: counts = [] for unique in unique_values: count = 0 for j in predictions: if unique == j: count = count + 1 counts.append(count) #print("unique: ",unique_values) #print("counts: ",counts) prediction = None if len(counts) > 0: max_index = np.argmax(np.array(counts)) prediction = unique_values[max_index] #print(index,". actual: ",actual," - prediction: ", prediction) if actual == prediction: classified = classified + 1 print("Accuracy: ", 100 * classified / number_of_instances, "% on ", number_of_instances, " instances") return models
def collect_gesture(self, capture, ges, photo_num): photo_num = photo_num vedeo = False predict = False count = 0 # 读取默认摄像头 cap = cv2.VideoCapture(capture) # 设置捕捉模式 cap.set(10, 200) # 背景减法创建及初始化 bgModel = cv2.createBackgroundSubtractorMOG2(0, self.bgSubThreshold) while True: # 读取视频帧 ret, frame = cap.read() # 镜像转换 frame = cv2.flip(frame, 1) cv2.imshow('Original', frame) # 双边滤波 frame = cv2.bilateralFilter(frame, 5, 50, 100) # 绘制矩形,第一个为左上角坐标(x,y),第二个为右下角坐标 # rec = cv2.rectangle(frame, (220, 50), (450, 300), (255, 0, 0), 2) rec = cv2.rectangle(frame, (self.x1, self.y1), (self.x2, self.y2), (255, 0, 0), 2) # 定义roi区域,第一个为y的取值,第2个为x的取值 # frame = frame[50:300, 220:450] frame = frame[self.y1:self.y2, self.x1:self.x2] # 背景减法运动检测 bg = bgModel.apply(frame, learningRate=0) # 显示背景减法的窗口 cv2.imshow('bg', bg) # 图像边缘处理--腐蚀 fgmask = cv2.erode(bg, self.skinkernel, iterations=1) # 显示边缘处理后的图像 cv2.imshow('erode', fgmask) # 将原始图像与背景减法+腐蚀处理后的蒙版做"与"操作 bitwise_and = cv2.bitwise_and(frame, frame, mask=fgmask) # 显示与操作后的图像 cv2.imshow('bitwise_and', bitwise_and) # 灰度处理 gray = cv2.cvtColor(bitwise_and, cv2.COLOR_BGR2GRAY) # 高斯滤波 blur = cv2.GaussianBlur(gray, (self.blurValue, self.blurValue), 2) # cv2.imshow('GaussianBlur', blur) # 使用自适应阈值分割(adaptiveThreshold) thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) cv2.imshow('th3', thresh) Ges = cv2.resize(thresh, (100, 100)) # 图像的阈值处理(采用ostu) # _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) # cv2.imshow('threshold1', thresh) if predict == True: # img = cv2.resize(thresh, (100, 100)) img = np.array(Ges).reshape(-1, 100, 100, 1) / 255 prediction = p_model.predict(img) final_prediction = [result.argmax() for result in prediction][0] ges_type = self.gesture[final_prediction] print(ges_type) cv2.putText(rec, ges_type, (self.x1, self.y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2, thickness=3, color=(0, 0, 255)) # cv2.putText(rec, ges_type, (150, 220), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=3, color=(0, 0, 255)) cv2.imshow('Original', rec) if vedeo is True and count < photo_num: # 录制训练集 cv2.imencode( '.jpg', Ges)[1].tofile(self.train_path + '{}_{}.jpg'.format( str(random.randrange(1000, 100000)), str(ges))) count += 1 print(count) elif count == photo_num: print('{}张测试集手势录制完毕,3秒后录制此手势测试集,共{}张'.format( photo_num, photo_num * 0.43 - 1)) time.sleep(3) count += 1 elif vedeo is True and photo_num < count < photo_num * 1.43: cv2.imencode( '.jpg', Ges)[1].tofile(self.predict_path + '{}_{}.jpg'.format( str(random.randrange(1000, 100000)), str(ges))) count += 1 print(count) elif vedeo is True and count == photo_num * 1.43: vedeo = False ges += 1 print('此手势录制完成,按l录制下一个手势,按t结束录制并进行训练') k = cv2.waitKey(1) if k == 27: break elif k == ord('l'): # 录制手势 vedeo = True count = 0 elif k == ord('p'): # 预测手势 predict = True while True: model_name = input('请输入模型的名字\n') if model_name == 'exit': break if model_name in os.listdir('./'): print('正在加载{}模型'.format(model_name)) p_model = load_model(model_name) break else: print('模型名字输入错误,请重新输入,或输入exit退出') elif k == ord('r'): bgModel = cv2.createBackgroundSubtractorMOG2( 0, self.bgSubThreshold) print('背景重置完成') elif k == ord('t'): os.environ["CUDA_VISIBLE_DEVICES"] = "0" train = Training(batch_size=32, epochs=5, categories=len(self.gesture), train_folder=self.train_path, test_folder=self.predict_path, model_name=p_model) train.train() backend.clear_session()
def processContinuousFeatures(algorithm, df, column_name, entropy, config): unique_values = sorted(df[column_name].unique()) #print(column_name,"->",unique_values) subset_gainratios = []; subset_gains = []; subset_ginis = []; subset_red_stdevs = [] for i in range(0, len(unique_values)-1): threshold = unique_values[i] subset1 = df[df[column_name] <= threshold] subset2 = df[df[column_name] > threshold] subset1_rows = subset1.shape[0]; subset2_rows = subset2.shape[0] total_instances = df.shape[0] #subset1_rows+subset2_rows subset1_probability = subset1_rows / total_instances subset2_probability = subset2_rows / total_instances if algorithm == 'ID3' or algorithm == 'C4.5': threshold_gain = entropy - subset1_probability*Training.calculateEntropy(subset1, config) - subset2_probability*Training.calculateEntropy(subset2, config) subset_gains.append(threshold_gain) if algorithm == 'C4.5': #C4.5 also need gain in the block above. That's why, instead of else if we used direct if condition here threshold_splitinfo = -subset1_probability * math.log(subset1_probability, 2)-subset2_probability*math.log(subset2_probability, 2) gainratio = threshold_gain / threshold_splitinfo subset_gainratios.append(gainratio) elif algorithm == 'CART': decision_for_subset1 = subset1['Decision'].value_counts().tolist() decision_for_subset2 = subset2['Decision'].value_counts().tolist() gini_subset1 = 1; gini_subset2 = 1 for j in range(0, len(decision_for_subset1)): gini_subset1 = gini_subset1 - math.pow((decision_for_subset1[j]/subset1_rows),2) for j in range(0, len(decision_for_subset2)): gini_subset2 = gini_subset2 - math.pow((decision_for_subset2[j]/subset2_rows),2) gini = (subset1_rows/total_instances)*gini_subset1 + (subset2_rows/total_instances) * gini_subset2 subset_ginis.append(gini) #---------------------------------- elif algorithm == 'Regression': superset_stdev = df['Decision'].std(ddof=0) subset1_stdev = subset1['Decision'].std(ddof=0) subset2_stdev = subset2['Decision'].std(ddof=0) threshold_weighted_stdev = (subset1_rows/total_instances)*subset1_stdev + (subset2_rows/total_instances)*subset2_stdev threshold_reducted_stdev = superset_stdev - threshold_weighted_stdev subset_red_stdevs.append(threshold_reducted_stdev) #---------------------------------- if algorithm == "C4.5": winner_one = subset_gainratios.index(max(subset_gainratios)) elif algorithm == "ID3": #actually, ID3 does not support for continuous features but we can still do it winner_one = subset_gains.index(max(subset_gains)) elif algorithm == "CART": winner_one = subset_ginis.index(min(subset_ginis)) elif algorithm == "Regression": winner_one = subset_red_stdevs.index(max(subset_red_stdevs)) winner_threshold = unique_values[winner_one] #print("theshold is ",winner_threshold," for ",column_name) df[column_name] = np.where(df[column_name] <= winner_threshold, "<="+str(winner_threshold), ">"+str(winner_threshold)) return df
def get_params(): checkpoint_dir = '/Users/Nolsigan/PycharmProjects/rlntm-tensorflow/checkpoints' max_length = 6 rnn_cell = rnn.BasicLSTMCell rnn_hidden = 128 learning_rate = 0.003 optimizer = tf.train.AdamOptimizer() gradient_clipping = 5 batch_size = 100 epochs = 30 epoch_size = 100 num_symbols = 10 dup_factor = 2 mem_dim = 128 mem_move_table = [-1, 0, 1] in_move_table = [-1, 0, 1] out_move_table = [0, 1] return AttrDict(**locals()) mode = sys.argv[1] if mode == '--train': Training(get_params())() elif mode == '--test': Testing(get_params())() else: print('no mode specified, please use --train or --test as first argument')
config['epochs'] = 2 config['lr'] = 1e-5 config['accumulated'] = 2 fine_tuning = False zalo = ZaloDatasetProcessor() zalo.load_from_path(dataset_path='dataset', train_filename='combine.json', test_filename='test.json', dev_filename='dev.json') tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased') features_train = zalo.convert_examples_to_features(zalo.train_data, zalo.label_list, 256, tokenizer) features_test = zalo.convert_examples_to_features(zalo.test_data, zalo.label_list, 256, tokenizer) features_dev = zalo.convert_examples_to_features(zalo.dev_data, zalo.label_list, 256, tokenizer) if __name__ == "__main__": NUM_OF_INTENT = 2 config_model = BertConfig.from_pretrained('bert-base-multilingual-cased', output_hidden_states=True) model = QAModel(config_model, NUM_OF_INTENT) if fine_tuning: model.load_state_dict(torch.load('models/model-squad1.bin')) training = Training(features_train, features_dev, model, logger, zalo.label_list, config) training.train()