def train_and_val(config, model, callbacks, mixture_num, sub_model_name): """Train and validate model.""" print('training %s %s model' % (model_name, sub_model_name)) train_size = int( (num_mon_sites * num_mon_inst_train + num_unmon_sites_train) * 0.95) train_steps = train_size // batch_size val_size = int( (num_mon_sites * num_mon_inst_train + num_unmon_sites_train) * 0.05) val_steps = val_size // batch_size train_time_start = time.time() model.fit_generator(data_generator.generate(config, 'training_data', mixture_num), steps_per_epoch=train_steps if train_size % batch_size == 0 else train_steps + 1, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=data_generator.generate( config, 'validation_data', mixture_num), validation_steps=val_steps if val_size % batch_size == 0 else val_steps + 1, shuffle=False) train_time_end = time.time() print('Total training time: %f' % (train_time_end - train_time_start))
def load_data(y_name='Color'): """Returns the iris dataset as (train_x, train_y), (test_x, test_y).""" train_data = data_generator.generate(NUM_TRAIN_DATA) print("TRAIN DATA:") print(train_data) train = pd.DataFrame(train_data, columns=COLUMN_NAMES) train_x, train_y = train, train.pop(y_name) test_data = data_generator.generate(NUM_TEST_DATA) print("TEST DATA:") print(test_data) test = pd.DataFrame(test_data, columns=COLUMN_NAMES) test_x, test_y = test, test.pop(y_name) return (train_x, train_y), (test_x, test_y)
def main(): welcome() schema = open_data(data_location) if validate_data(schema): logging.info("[*] schema is valid") else: print("[!] exiting...") exit() # simulation state if simulation_table is not None: dynamo.create_simulation_record(dynamodb, simulation_table, simulation_id, simulation_duration, message_interval) # run simulation for i in range(simulation_duration): data = data_generator.generate(schema) logging.info(data) if not write_data(json.dumps(data)): logging.warning("[!] message failed to write to iot core endpoint") exit() time.sleep(message_interval) print("[*] simulation completed") # update state if simulation_table is not None: dynamo.delete_simulation_record(dynamodb, simulation_table, simulation_id)
def quick_train(train_stock): generate(train_stock) training_stock = 'training_data/' + train_stock + '.csv' strategy = 'double-dqn' window_size = 10 batch_size = 32 ep_count = 10 model_name = 'model_double-dqn_GOOG_50' pretrained = False debug = False coloredlogs.install(level="DEBUG") switch_k_backend_device() print(training_stock) main(training_stock, window_size, batch_size, ep_count, strategy=strategy, model_name=model_name, pretrained=pretrained, debug=debug)
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list lang_dict = load_dict(args.language) # Create font (path) list fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] strings = create_strings_from_dict(args.length, args.random, args.count, lang_dict) string_count = len(strings) imgLists = load_img(args.background_dir) for i, img in enumerate(imgLists): try: generate(i, img, random.sample(strings, random.randint(1, 40)), fonts[random.randrange(0, len(fonts))], args.output_dir, args.extension, args.width, args.text_color, args.orientation, args.space_width, args.font_size) except: continue
def predict(config, model, mixture_num, sub_model_name): """Compute and save final predictions on test set.""" print('generating predictions for %s %s model' % (model_name, sub_model_name)) if model_name == 'var-cnn': model.load_weights('model_weights.h5') test_size = num_mon_sites * num_mon_inst_test + num_unmon_sites_test test_steps = test_size // batch_size test_time_start = time.time() predictions = model.predict_generator( data_generator.generate(config, 'test_data', mixture_num), steps=test_steps if test_size % batch_size == 0 else test_steps + 1, verbose=0) test_time_end = time.time() if not os.path.exists(predictions_dir): os.makedirs(predictions_dir) np.save(file='%s%s_model' % (predictions_dir, sub_model_name), arr=predictions) print('Total test time: %f' % (test_time_end - test_time_start))
# default is 1.0. Accuracy becomes lower with larger sigma sigma = 1.0 print('number of classes: ',num_class,' sigma for data scatter:',sigma) if num_class == 4: n_train = 400 n_test = 100 feat_dim = 2 else: # then 3 n_train = 300 n_test = 60 feat_dim = 2 # generate train dataset print('generating training data') x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma) # generate test dataset print('generating test data') x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma) # set classifiers to 'svm' to test SVM classifier # set classifiers to 'softmax' to test softmax classifier # set classifiers to 'knn' to test kNN classifier classifiers = 'svm' if classifiers == 'svm': print('training SVM classifier...') w0 = np.random.normal(0, 1, (2 * num_class + num_class)) result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim)) print('testing SVM classifier...')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--num_candidate', type=int) parser.add_argument('--num_course', type=int) parser.add_argument('--if_figure', type=bool, default=False) parser.add_argument('--save_to', type=str, default='result/trail') parser.add_argument('--num_simulations', type=int, default=100) args = parser.parse_args() if not args.if_figure: data = data_generator.generate(args.num_candidate, args.num_course) output_file_name = args.save_to.split('.')[0] matching_sm = run_stable_marriage(data) score, course_satisfaction, candidate_satisfaction = evaluate_matching( data, matching_sm) write_to_file(data, matching_sm, output_file_name + '_sm.csv', score, course_satisfaction, candidate_satisfaction) matching_hg = hungarian(data) score, course_satisfaction, candidate_satisfaction = evaluate_matching( data, matching_hg) write_to_file(data, matching_hg, output_file_name + '_hg.csv', score, course_satisfaction, candidate_satisfaction) matching_mm = maximum_matching(data) score, course_satisfaction, candidate_satisfaction = evaluate_matching( data, matching_mm) write_to_file(data, matching_mm, output_file_name + '_mm.csv', score, course_satisfaction, candidate_satisfaction) else: n = args.num_simulations dir = str(args.num_candidate) + 'candidates_' + str( args.num_course) + 'courses_' + str(n) + 'simulations' while n < 100: n = int(input('Try a number > 100: ') or '100') if not Path('./figures/' + dir).exists(): Path('./figures/' + dir).mkdir(parents=True) score = np.zeros([n, 3]) prof_rate = np.zeros([n, 3]) can_rate = np.zeros([n, 3]) for i in progressbar.progressbar(range(n)): data = data_generator.generate(args.num_candidate, args.num_course) sm = run_stable_marriage(data) hg = hungarian(data) mm = maximum_matching(data) score[i, 0], prof_rate[i, 0], can_rate[i, 0] = evaluate_matching(data, sm) score[i, 1], prof_rate[i, 1], can_rate[i, 1] = evaluate_matching(data, hg) score[i, 2], prof_rate[i, 2], can_rate[i, 2] = evaluate_matching(data, mm) plt.figure(1) plt.hist(score[:, 0], bins=10, label='Stable Marriage', alpha=0.6, color='c') plt.axvline(sum(score[:,0])/n, linestyle='--', \ label='Mean of Stable Marraige={0:.{1}f}'.format(sum(score[:,0])/n,2), color='c') plt.hist(score[:, 1], bins=10, label='Hungarian', alpha=0.6, color='limegreen') plt.axvline(sum(score[:,1])/n, linestyle='--', \ label='Mean of Hungarian={0:.{1}f}'.format(sum(score[:,1])/n,2), color='limegreen') plt.hist(score[:, 2], bins=10, label='Maximum Matching', alpha=0.6, color='orange') plt.axvline(sum(score[:,2])/n, linestyle='--', \ label='Mean of Maximum Matching={0:.{1}f}'.format(sum(score[:,2])/n,2), color='orange') plt.title('Score, Monte Carlo n ={}'.format(n)) plt.legend() plt.savefig(Path('./figures/' + dir + '/scores.png')) plt.figure(2) plt.hist(prof_rate[:, 0], bins=10, label='Stable Marriage', alpha=0.6, color='c') plt.axvline(sum(prof_rate[:,0])/n, linestyle='--', \ label='Mean of Stable Marraige={0:.{1}f}'.format(sum(prof_rate[:,0])/n,2), color='c') plt.hist(prof_rate[:, 1], bins=10, label='Hungarian', alpha=0.6, color='limegreen') plt.axvline(sum(prof_rate[:,1])/n, linestyle='--', \ label='Mean of Hungarian={0:.{1}f}'.format(sum(prof_rate[:,1])/n,2), color='limegreen') plt.hist(prof_rate[:, 2], bins=10, label='Maximum Matching', alpha=0.6, color='orange') plt.axvline(sum(prof_rate[:,2])/n, linestyle='--', \ label='Mean of Maximum Matching={0:.{1}f}'.format(sum(prof_rate[:,2])/n,2), color='orange') plt.title('Professors satisfaction rate, Monte Carlo n ={}'.format(n)) plt.legend() plt.savefig(Path('./figures/' + dir + '/prof_rate.png')) plt.figure(3) plt.hist(can_rate[:, 0], bins=10, label='Stable Marriage', alpha=0.6, color='c') plt.axvline(sum(can_rate[:,0])/n, linestyle='--', \ label='Mean of Stable Marraige={0:.{1}f}'.format(sum(can_rate[:,0])/n,2), color='c') plt.hist(can_rate[:, 1], bins=10, label='Hungarian', alpha=0.6, color='limegreen') plt.axvline(sum(can_rate[:,1])/n, linestyle='--', \ label='Mean of Hungarian={0:.{1}f}'.format(sum(can_rate[:,1])/n,2), color='limegreen') plt.hist(can_rate[:, 2], bins=10, label='Maximum Matching', alpha=0.6, color='orange') plt.axvline(sum(can_rate[:,2])/n, linestyle='--',\ label='Mean of Maximum Matching={0:.{1}f}'.format(sum(can_rate[:,2])/n,2), color='orange') plt.title('Candidates satisfaction rate, Monte Carlo n ={}'.format(n)) plt.legend() plt.savefig(Path('./figures/' + dir + '/can_rate.png'))
def execute_program(): def status(word): word = str(word) if word.upper() != 'OK': beautiful_output.red_normal('--> Status: ' + '[' + "Error because " + str(word) + ']') input('Enter to quit...') os._exit(0) beautiful_output.green_normal('--> Status: ' + '[' + str(word) + ']') # Generate the code print('Begin to reconstruct...', end=' ') try: DB.reconstruct() # do not use del command become the CLI will be ugly os.system('rd /s/q train_data') os.system('md train_data') status('OK') except Exception as e: status(e) print('\nExecute generating progress...') try: num = int(input('The number of code you wanna generate: ')) print('Generating...') generator.generate(num) except Exception as e: num = 0 status(e) # print sample database print('\n') beautiful_output.underline('DATABASE CHECK:') DB.disp_DB() print('\nInitialize database...', end=' ') status('OK') # Clean the DB print('Remake the database, it may take a while...') try: data = data_generator.generate() except Exception as e: data = [] status(e) # print out the clean data beautiful_output.underline('\nData check:') try: print(list(data.values())[0][0]) print('\nData Check...', end=' ') status('OK') except Exception as e: status(e) # print log beautiful_output.underline('\nLog:') print('-------------------------------') print('train data number: ' + str(num * 4)) print('train data Pairs : ' + str(len(list(data.keys())))) print('covered data rate: ' + str(len(list(data.keys())) / 26 * 100)[:4] + '%') print('data shape : ' + str(list(data.values())[0][0].shape)) print('-------------------------------') # data constructor for training def construct_data(): label_total = list(data.keys()) feature = [] label = [] for i in label_total: for ii in data[i]: # decrease the dimension ii = ii.reshape(1, ii.shape[1] * ii.shape[0])[0] feature.append(ii) label.append(i) return [feature, label] # train the data print('\nReconstruct the feature and label array...') try: # construct the knn model temp = construct_data() feature = temp[0] label = temp[1] beautiful_output.underline('\nCheck the feature:') print(feature[0][:10]) print('\nReconstruct data...', end=' ') status('OK') except Exception as e: label = [] feature = [] status(e) print('\nTraining...', end=' ') try: # define cluster neighbor_num = len(np.unique(label)) mode = kNN(n_neighbors=neighbor_num, algorithm='auto') mode.fit(feature, label) status('OK') except Exception as e: neighbor_num = 0 mode = None status(e) # save model print('\nSave the model...', end=' ') try: joblib.dump(mode, './model.m') status('OK') except Exception as e: status(e) # validate accuracy print('\nValidate model accuracy') print('processing...') try: print('\nReconstruct...') DB.reconstruct() os.system('rd /s/q train_data') os.system('md train_data') print('\nGenerating test data...') generator.generate(int(num / 4)) print('Clean the data') data = data_generator.generate() print('Reconstruct the data', end=' ') temp = construct_data() feature = temp[0] label = temp[1] predict_label = mode.predict(feature) compare = sum(list(map(lambda x, y: x == y, predict_label, label))) accuracy = str(compare / len(label) * 100)[:4] status('OK') except Exception as e: predict_label = [] label = [] accuracy = None status(e) beautiful_output.underline('\nModel accuracy: ') print('---------------------') print('Predict: ' + str(predict_label[:10]) + '...') print('Actual: ' + str(label[:10]) + '...') print('---------------------') print(accuracy + '%') # print final summary beautiful_output.underline('\nSummary:') print('---------------------') print('Train data: ' + str(len(predict_label))) print('Test data: ' + str(int(len(predict_label) * 0.2))) print('Neighbor: ' + str(neighbor_num) + '/26') print('Model Accuracy: ' + accuracy + '%') print('Model Address: ' + './model.m') print('Train method: ' + 'Knn') print('---------------------')
import torch import pickle # Import utility scripts from MeLU import MeLU from config import config from train import training from data_generator import generate from evidence_candidate import selection if __name__ == "__main__": master_path = "./ml" if not os.path.exists("{}/".format(master_path)): os.mkdir("{}/".format(master_path)) # Preparing the dataset. It needs about 22GB of your hard disk space. generate(master_path) # Training the model melu = MeLU(config) model_filename = "{}/models.pkl".format(master_path) if not os.path.exists(model_filename): # Load the training dataset training_set_size = int( len(os.listdir("{}/warm_state".format(master_path))) / 4) # The support set is for local update supp_xs_s = [] supp_ys_s = [] # The query set is for global update query_xs_s = [] query_ys_s = []
def dataGen(self): ''' Generates CSV data from StockTwits ''' dataGenerator.generate(self.ticker)
import math from PIL import Image, ImageDraw import data_generator points = data_generator.generate(250) img = Image.new("RGB", (12 * 100, 12 * 100), "white") draw = ImageDraw.Draw(img) dotSize = 5 for point in points: x = math.trunc((point[0] + 6) * 100) y = math.trunc((point[1] + 6) * 100) draw.rectangle([x, y, x + dotSize - 1, y + dotSize - 1], fill="orange" if point[2] == 0 else "blue") img.show() # View in default viewer
if with_pyparams: parameters = ", ".join(["%s"] * len(columns)) if with_numparams: parameters = ", ".join(":" + str(idx + 1) for idx in range(len(columns))) sql = "INSERT INTO %s (%s) VALUES (%s)" \ % (table, arguments, parameters) cursor.executemany(sql, data) with app: connection = connect() cursor = connection.cursor() content = yaml.load(open(REGRESS_DATA)) assert isinstance(content, list) for sql in prelude: cursor.execute(sql) for line in content: insert_table_data(line, cursor) generated_content = data_generator.generate(content) for line in generated_content: insert_table_data(line, cursor) connection.commit() connection.release()