def train(): # Download and split data. common.split_data(TIME_STEPS) # Build and compile the model. model = build_model() # model.save_weights(CKP_PATH.format(epoch=0)) # Load last checkpoint if any. # model.load_weights( # tf.train.latest_checkpoint( # os.path.dirname(CKP_PATH) # ) # ) train_idg = generators.TimeDistributedImageDataGenerator( rotation_range=30, zoom_range=0.15, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15, horizontal_flip=True, rescale=1. / 255, time_steps=TIME_STEPS, ) validation_idg = generators.TimeDistributedImageDataGenerator( time_steps=TIME_STEPS, ) history = model.fit( train_idg.flow_from_directory( common.TRAIN_DATA_PATH, target_size=(48, 48), batch_size=BATCH_SIZE, class_mode='sparse', shuffle=False, color_mode='rgb', # classes=['agree_pure', 'agree_considered'], # save_to_dir='./data/train' ), validation_data=validation_idg.flow_from_directory( common.VALIDATION_DATA_PATH, target_size=(48, 48), batch_size=BATCH_SIZE, class_mode='sparse', shuffle=False, color_mode='rgb', # classes=['agree_pure', 'agree_considered'], # save_to_dir='./data/test' ), callbacks=CALLBACKS, epochs=EPOCHS, ) model.save(SVD_PATH) common.plot_acc_loss(history, PLT_PATH)
def train(model_config, model_name): token = '{}_{}_{}'.format('spv-cnn', model_config, model_name) checkpoint_dir = tempfile.mkdtemp(prefix=token + '_', dir=config.DIR_MODEL) path_loss_plot = os.path.join(checkpoint_dir, 'LOSS_{}.png'.format(token)) checkpoint_path = os.path.join( checkpoint_dir, 'check_gen{epoch:02d}_loss{val_loss:.2f}.hdf5') model_path = os.path.join(config.DIR_MODEL, 'MODEL_{}.hdf5'.format(token)) tee = Tee(os.path.join(config.DIR_LOG, 'LOG_{}.logg'.format(token)), 'w') # noqa: F841 # ## preproc X, Y = load_label(config.DIR_DATA) X = transform_channel(X, orig_mode='channels_first') # ## split data X_train, Y_train, X_valid, Y_valid = split_data(X, Y, ratio=0.9) X_train, Y_train = data_augmentation(X_train, Y_train) # ## small down dataset # X, Y, X_out, Y_out = split_data(X, Y, ratio=0.5) # standardize train data # orig_shape = X_train.shape # tmp_X_train = X_train.copy() # tmp_X_train.shape = (orig_shape[0], orig_shape[1]*orig_shape[2]*orig_shape[3]) # scaler = StandardScaler().fit(tmp_X_train) # tmp_X_train = scaler.transform(tmp_X_train) # tmp_X_train.shape = orig_shape # X_train = tmp_X_train if model_config == 'vgg16': model = keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(3, 32, 32)) batch_size = 8 else: func_get_custom_model = getattr(model_configs, model_config) model, batch_size = func_get_custom_model(10, inputs=(32, 32, 3)) model.summary() model.fit(X_train, Y_train, batch_size=batch_size, epochs=40, validation_data=(X_valid, Y_valid), callbacks=[ ModelCheckpoint(checkpoint_path, monitor='val_loss'), ModelCheckpoint(model_path, save_best_only=True, monitor='val_loss', mode='min'), EarlyStopping(monitor='val_loss', patience=3, mode='min'), PlotLosses(output_img=path_loss_plot) ])
def ohl(capital, star_param, cdata, typ): max_dp = star_param['MAX'] start = star_param['START'] thr = star_param['THR'] var = star_param['VAR'] sl = star_param['SL'] t1 = star_param['T1'] t2 = star_param['T2'] st_id = star_param['ID'] scr = star_param['SC'] sims = {} scrips = {} data = {} c.pr( "I", "Initialiazing Strategy OHL Max DP -> " + str(max_dp) + " Staring Data Point -> " + str(start), 0) #Fetch Scrips if scr == "ALL": scrips = c.load_scrips() else: scrips[scr] = 1 #Fetch Data for scrip in scrips: if scr != "ALL": if len(cdata): data = cdata else: data = c.fetch_scrip_data(scrip, start, 0) else: data = c.fetch_scrip_data(scrip, start, 0) spl_data = c.split_data(data, 36000) for ctr in spl_data: rddata = collections.OrderedDict(sorted(spl_data[ctr].items())) iddata = c.intrafy(rddata) sim_key, sim_data = ohl_process(iddata, thr, var, scrip, capital, max_dp, sl, t1, t2, st_id) if sim_key: sims[sim_key + "_" + scrip] = sim_data #Call Simulations if len(sims): rans = randomize(spl_data, sims, start, "09:31:00", "15:10:00", "OHL", capital, sl, t1, t2, st_id) c.pr("I", str(len(sims)) + " Actual Simulations Will Be Performed", 1) c.pr("I", str(len(rans)) + " Random Simulations Will Be Performed", 1) if typ == "B": for key in sims: sim.simulate(sims[key]) for key in rans: sim.simulate(rans[key]) else: sim.init_sim(sims, rans, st_id) return
def test_year(data, type_): copy_df = data.copy() copy_df['year'] = copy_df['year'].astype(type_) model = 'origin ~ ' + ' + '.join(features) x_train, x_test, y_train, y_test = split_data(copy_df, model, test_size=0.3, random_state=1) log_reg = LogisticRegression(fit_intercept=True, C=1e9, solver='lbfgs', multi_class='ovr', max_iter=1e5) log_reg.fit(x_train, y_train) print("Training accuracy: ", accuracy_score(y_train, log_reg.predict(x_train))) print("Training log-loss", log_loss(y_train, log_reg.predict_proba(x_train))) print("Validation accuracy: ", accuracy_score(y_test, log_reg.predict(x_test))) cnf = confusion_matrix(y_test, log_reg.predict(x_test)) plot_confusion_matrix(cnf, classes=[1, 2, 3]) plot_confusion_matrix(cnf, classes=[1, 2, 3], normalize=False)
def main(): """Train and test a perceptron implementation. This takes data at the location defined by DATA_DIRECTORY, splits it into and creates a perceptron to predict values. """ data = get_dataset(DATA_DIRECTORY) training, validation, test = split_data(data) perceptron = Perceptron() # TODO: Use validation set to tune alpha x, y = training['x'], training['y'] perceptron.train(x, y, alpha=DEFAULT_STEP_SIZE) accuracy = determine_accuracy(perceptron, test) print(f'The perceptron is {int(accuracy * 100)}% accurate. Woo.')
def train(model_config, model_name): token = '{}_{}_{}'.format('st-cnn', model_config, model_name) checkpoint_dir = tempfile.mkdtemp(prefix=token + '_', dir=config.DIR_MODEL) model_path = os.path.join(config.DIR_MODEL, 'MODEL_{}.hdf5'.format(token)) tee = Tee(os.path.join(config.DIR_LOG, 'LOG_{}.logg'.format(token)), 'w') # noqa: F841 # ## preproc # label data preproc LX, LY = load_label(config.DIR_DATA) LX = transform_channel(LX, orig_mode='channels_first') LX, LY, LX_valid, LY_valid = split_data(LX, LY, ratio=0.8) LX_valid, LY_valid = data_augmentation(LX_valid, LY_valid) # unlabel data preproc UX = load_unlabel(config.DIR_DATA) UX = transform_channel(UX, orig_mode='channels_first') # pretrain_model spv_token = '{}_{}_{}'.format('spv-cnn', model_config, model_name) pretrain_model_path = os.path.join(config.DIR_MODEL, 'MODEL_{}.hdf5'.format(spv_token)) if os.path.exists(pretrain_model_path): model, batch_size = _create_model(model_config, path_restore=pretrain_model_path) model.summary() else: model, batch_size = _create_model(model_config) model.summary() model.fit(LX, LY, batch_size=batch_size, epochs=5, validation_data=(LX_valid, LY_valid), callbacks=[ EarlyStopping(monitor='val_loss', patience=3, mode='min'), ]) # ## self-training num_self_train = 10 num_epochs = 10 patience = 1 relable_score = 0.92 path_best_checkpoint = None for st_round in range(1, 1 + num_self_train): print('\n\n----- Round {} -----\n\n'.format(st_round)) round_token = token + '_round{}'.format(st_round) path_loss_plot = os.path.join(checkpoint_dir, 'LOSS_{}.png'.format(round_token)) checkpoint_path = os.path.join( checkpoint_dir, 'check_round{}'.format(st_round) + '_gen{epoch:02d}_loss{val_loss:.2f}.hdf5') # restore model if path_best_checkpoint is not None: model, batch_size = _create_model( model_config, path_restore=path_best_checkpoint) # add predicted unlabel data above relable_score X_train, Y_train = LX, LY ux, uy = _select_unlabeled_above_relable(UX, model, relable_score) if ux is not None: num_add_unlabeled = ux.shape[0] print 'add unlabeled data: ', num_add_unlabeled X_train = np.concatenate((X_train, ux), axis=0) Y_train = np.concatenate((Y_train, uy), axis=0) X_train, Y_train = data_augmentation(X_train, Y_train) model.fit( X_train, Y_train, batch_size=batch_size, epochs=num_epochs, validation_data=(LX_valid, LY_valid), callbacks=[ ModelCheckpoint(checkpoint_path, monitor='val_loss'), EarlyStopping(monitor='val_loss', patience=patience, mode='min'), PlotLosses(output_img=path_loss_plot) ], verbose=1, ) del model # select best model checkpoints = filter(lambda x: '_loss' in x, os.listdir(checkpoint_dir)) best_checkpoint = sorted( checkpoints, key=lambda x: float((x.split('_loss')[1]).replace('.hdf5', '')))[0] print 'best checkpoint right now: ', best_checkpoint path_best_checkpoint = os.path.join(checkpoint_dir, best_checkpoint) copyfile(path_best_checkpoint, model_path)
verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp() X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test clf_r = [] clf_b = [] clf_s = [] for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train, X_test, y_test, opts.n_folds, not opts.regression): clf_r.append(fit_kmp(X_tr, y_tr, X_te, y_te, "random", opts, random_state=0)) clf_b.append(fit_kmp(X_tr, y_tr, X_te, y_te, "balanced", opts, random_state=0)) clf_s.append(fit_kmp(X_tr, y_tr, X_te, y_te, "stratified", opts, random_state=0)) rs = np.vstack([clf.validation_scores_ for clf in clf_r]) bs = np.vstack([clf.validation_scores_ for clf in clf_b]) ss = np.vstack([clf.validation_scores_ for clf in clf_s]) pl.figure()
from shutil import copyfile import time from common import load_label, load_unlabel from common import split_data from common import transform_channel from common import data_augmentation from keras.models import load_model # label data preproc folder = os.path.join(PATH,'data') LX,LY = load_label(folder) LX = transform_channel(LX,orig_mode="channels_first") LX,LY,X_valid,Y_valid = split_data(LX,LY,ratio=0.9) # unlabel data preproc UX = load_unlabel(folder) UX = transform_channel(UX,orig_mode="channels_first") # load model from models_supervised_cnn import YCNet3 model_input = os.path.join(PATH,'model','model_cnn_gen15_loss1.07_acc67.6.hdf5') # path or None if os.path.isfile(model_input): model,batch_size = YCNet3(10, inputs=(32,32,3), file_load_weights=model_input) else: model,batch_size = YCNet3(10, inputs=(32,32,3)) model.summary()
X_val=X_test, y_val=y_test, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp(n_components=1.0, check_duplicates=True) X_tr, y_tr, X_te, y_te = X_train, y_train, X_test, y_test if opts.n_nonzero_coefs < 1: raise ValueError("n_nonzero_coefs must be a positive integer") cv = list(split_data(X_train, y_train, X_test, y_test, opts.n_folds, not opts.regression)) amounts = np.linspace(0.1, 1.0, 10) #amounts = (0.25, 0.5, 0.75, 1.0) #amounts = np.linspace(0.1, 0.5, 5) acc_sup = np.zeros((len(amounts), len(cv)), dtype=np.float64) acc_semi = np.zeros((len(amounts), len(cv)), dtype=np.float64) j = 0 for X_tr, y_tr, X_te, y_te in cv: for i, perc_label in enumerate(amounts): print "Percentage of labeled data:", perc_label X_all, X_l, y_l = split_unlabeled_data(X_tr, y_tr,
def train(model_config, model_name): token = '{}_{}_{}'.format('ae-cnn', model_config, model_name) checkpoint_dir = tempfile.mkdtemp(prefix=token + '_', dir=config.DIR_MODEL) path_loss_plot = os.path.join(checkpoint_dir, 'LOSS_{}.png'.format(token)) checkpoint_path = os.path.join( checkpoint_dir, 'check_gen{epoch:02d}_loss{val_loss:.2f}.hdf5') model_path = os.path.join(config.DIR_MODEL, 'MODEL_{}.hdf5'.format(token)) tee = Tee(os.path.join(config.DIR_LOG, 'LOG_{}.logg'.format(token)), 'w') # noqa: F841 # ## preproc # label data preproc LX, LY = load_label(config.DIR_DATA) LX = transform_channel(LX, orig_mode='channels_first') LX, LY, X_valid, Y_valid = split_data(LX, LY, ratio=0.9) # unlabel data preproc UX = load_unlabel(config.DIR_DATA) UX = transform_channel(UX, orig_mode='channels_first') func_get_aec = getattr(ae_classifier_configs, model_config) autoencoder_classifier = func_get_aec(10, inputs=(32, 32, 3)) # pretrain autoencoder train_ae_X = np.concatenate((LX, UX), axis=0) train_ae_X, _ = data_augmentation(train_ae_X, np.ones((train_ae_X.shape[0], 1))) normal_train_ae_X = np.asarray(train_ae_X, dtype='float32') / 255.0 normal_X_valid = np.asarray(X_valid, dtype='float32') / 255.0 ae, batch_ae = autoencoder_classifier.get_autoencoder() ae.summary() ae.fit( train_ae_X, normal_train_ae_X, batch_size=batch_ae, epochs=5, validation_data=(X_valid, normal_X_valid), verbose=1, ) # autoencoder_classifier.freeze_ae_layers() # train train_X, train_Y = data_augmentation(LX, LY) ae_classifier, batch_ae_classifier = autoencoder_classifier.get_ae_classifier( ) ae_classifier.summary() ae_classifier.fit(train_X, train_Y, batch_size=batch_ae_classifier, epochs=60, validation_data=(X_valid, Y_valid), verbose=1, callbacks=[ ModelCheckpoint(checkpoint_path, monitor='val_loss'), ModelCheckpoint(model_path, save_best_only=True, monitor='val_loss', mode='min'), EarlyStopping(monitor='val_loss', patience=3, mode='min'), PlotLosses(output_img=path_loss_plot) ])
return clf X_train, y_train, X_test, y_test, opts, args = parse_kmp(check_duplicates=True) opt_dict = options_to_dict(opts) class_distrib = "random" if opts.regression else "balanced" clf_s = [] clf_kg = [] clf_kb = [] clf_ks = [] j = 0 for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train, X_test, y_test, opts.n_folds, opts.cvtype, opts.force_cv): print "Fold", j # selected from datasets print "Selected components" components = select_components(X_tr, y_tr, opts.n_components, class_distrib=class_distrib, random_state=j) clf_s.append(fit_kmp(X_tr, y_tr, X_te, y_te, components, opt_dict, opts.regression, random_state=j)) # k-means global print "Global k-means" components = create_kmeans_comp(X_tr, y_tr, n_components=opts.n_components,
import common import pandas as pd DIR = "data" DATA_SETS = ["train", "test"] df = common.load_data() selected_rows = common.select_rows(df) train, test = common.split_data(selected_rows) for rows, data_set_name in zip([train, test], DATA_SETS): pd.DataFrame(rows, columns=["row_number"])\ .to_csv("{}/{}_rows.txt".format(DIR, data_set_name), index=False)
def randomize(data, sim_data, start, st, en, star, capital, sl, t1, t2, st_id): uniq_scrip = {} trans_order = {} ran_data = {} random.seed(100) #print(sim_data) for sim in sim_data: #Seperate data as per Scrips scrip = sim_data[sim]['SC'] if scrip not in uniq_scrip: uniq_scrip[scrip] = {} trans_order[scrip] = [] uniq_scrip[scrip][sim_data[sim]['TS']] = sim_data[sim] trans_order[scrip].append(uniq_scrip[scrip][sim_data[sim]['TS']]['TP']) for scrip in uniq_scrip: data = c.fetch_scrip_data(scrip, start, 0) spl_data = c.split_data(data, 36000) max_dp = len(uniq_scrip[scrip]) dp_avl = (len(spl_data) - len(sim_data)) dp_keys = spl_data.keys() if max_dp > (dp_avl - 2): max_dp = (dp_avl - 2) c.pr( "I", "Generating Random Data For Scrip -> " + scrip + " DP Needed -> " + str(max_dp) + " DP Available -> " + str(dp_avl) + " Total Sims -> " + str(len(sim_data)), 0) ctr = 0 for ran_dp in dp_keys: if ctr == max_dp: break spl_tmp = spl_data[ran_dp] rddata = collections.OrderedDict( sorted(spl_data[ran_dp].items())) iddata = c.intrafy(rddata) tkey = c.get_timestamp( c.get_only_date(list(iddata.keys())[0]) + " " + st) dp_check = tkey + "_" + scrip #if dp_check not in sim_data: if dp_check not in ran_data: ran_data[dp_check] = {} ran_data[dp_check]['TS'] = tkey ran_data[dp_check]['NM'] = star ran_data[dp_check]['ST'] = "RAN" ran_data[dp_check]['EN'] = en ran_data[dp_check]['SC'] = scrip ran_data[dp_check]['TP'] = trans_order[scrip][ctr] ran_data[dp_check]['CP'] = capital ran_data[dp_check]['SL'] = sl ran_data[dp_check]['T1'] = t1 ran_data[dp_check]['T2'] = t2 ran_data[dp_check]['ID'] = st_id ran_data[dp_check]['DATA'] = data ctr += 1 else: c.pr( "I", "Generating Random Data For Scrip -> " + scrip + " DP Needed -> " + str(max_dp) + " DP Available -> " + str(dp_avl) + " Total Sims -> " + str(len(sim_data)), 0) ctr = 0 for x in range(1, max_dp + 1): y = True while y: #Generate Random number ran_dp = random.randrange(1, (dp_avl - 1)) spl_tmp = spl_data[ran_dp] rddata = collections.OrderedDict( sorted(spl_data[ran_dp].items())) iddata = c.intrafy(rddata) tkey = c.get_timestamp( c.get_only_date(list(iddata.keys())[0]) + " " + st) dp_check = tkey + "_" + scrip if dp_check not in ran_data: ran_data[dp_check] = {} ran_data[dp_check]['TS'] = tkey ran_data[dp_check]['NM'] = star ran_data[dp_check]['ST'] = "RAN" ran_data[dp_check]['EN'] = en ran_data[dp_check]['SC'] = scrip ran_data[dp_check]['TP'] = trans_order[scrip][ctr] ran_data[dp_check]['CP'] = capital ran_data[dp_check]['SL'] = sl ran_data[dp_check]['T1'] = t1 ran_data[dp_check]['T2'] = t2 ran_data[dp_check]['ID'] = st_id ran_data[dp_check]['DATA'] = data ctr += 1 y = False return ran_data