def model_prediction(symbol): prediction = [] scaler = None for i in range(0, c.NUM_TRAIN): price_data, covid_data = prepare_data(c.SAVED_CSV_PATH.format(symbol), shuffle=c.SAMPLE_SHUFFLE) scaler = covid_data["scaler"] print(f"Executing for the {i}th time") model = build_overall_model(price_data, covid_data, batch_size=10) fit_overall_model(model, price_data, covid_data) price_data, covid_data = prepare_data(c.SAVED_CSV_PATH.format(symbol), shuffle=False) print(f"Making {i}th Prediction") if i == 0: prediction = predict_overall_model(model, price_data, covid_data) else: prediction += predict_overall_model(model, price_data, covid_data) prediction = np.array(prediction / c.NUM_TRAIN) label = np.array(covid_data["y_test"][:, 2]).reshape(-1, 1) pyplot.plot( scaler.inverse_transform(prediction[:, 0].reshape(-1, 1)), label="prediction_first", ) pyplot.plot( scaler.inverse_transform(prediction[:, c.PREDICTION_STEP - 1].reshape( -1, 1)), label="prediction_last", ) pyplot.plot(scaler.inverse_transform(label), label="actual") pyplot.legend() pyplot.show()
def mlformat(dir, segment_size): # read in audio files, take the fft, and return the normalized fft plus # the class mlsamples = [] mlclasses = [] flst = sorted(os.listdir(dir)) # list the available audio files for angle in [ '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']: for filename in flst: if fnmatch.fnmatch(filename, '%sdeg*' % angle): samplerate, samples = read(dir + '%s' % filename) sample, label = preprocessing.prepare_data(samples, samplerate, angle, segment_size) # take the fft mlsamples.extend(sample[0:1]) mlclasses.extend(label[0:1]) # the class of each fft return mlsamples, mlclasses
def train(): x_train, x_test, y_train, y_test = prepare_data() lasso = linear_model.Lasso() lasso.fit(x_train, y_train) y_pred = lasso.predict(x_test) return mean_squared_error(y_test, y_pred ) ** 0.5
def input_up(sess): data = preprocessing.prepare_data(dataset='Test') print(len(data)) sub_input_sequence = [] sub_label_sequence = [] padding = abs(config.image_size - config.label_size) // 2 # 6 input_,label_ = preprocessing.preprocess(data[0],config.scale) if len(input_.shape) == 3: h, w, _ = input_.shape else: h, w = input_.shape nx = 0 # 后注释 ny = 0 # 后注释 # 自图需要进行合并操作 for x in range(0, h - config.image_size + 1, config.stride): # x从0到h-33+1 步长stride(21) nx += 1 ny = 0 for y in range(0, w - config.image_size + 1, config.stride): # y从0到w-33+1 步长stride(21) ny += 1 # 分块sub_input=input_[x:x+33,y:y+33] sub_label=label_[x+6,x+6+21, y+6,y+6+21] sub_input = input_[x:x + config.image_size, y:y + config.image_size] # [33 x 33] sub_label = label_[x + padding:x + padding + config.label_size, y + padding:y + padding + config.label_size] # [21 x 21] sub_input = sub_input.reshape([config.image_size, config.image_size, 1]) sub_label = sub_label.reshape([config.label_size, config.label_size, 1]) sub_input_sequence.append(sub_input) sub_label_sequence.append(sub_label) # 上面的部分和训练是一样的 arrdata = np.asarray(sub_input_sequence) # [?, 33, 33, 1] arrlabel = np.asarray(sub_label_sequence) # [?, 21, 21, 1] make_data(arrdata, arrlabel) # 存成h5格式 return nx,ny
def load_external_data(link: str) -> Tuple[pd.DataFrame, List[str], Exception]: """ Load data from a link and preprocess it Parameters: ----------- link : str Link to the data (should be hosted online) Returns: -------- df : pandas.core.frame.DataFrame | False The data loaded and preprocessed. If there is an issue loading/preprocessing then it returns False instead. player_list : list | False List of players that have been in any board game match. If there is an issue with loading/preprocessing the data then it returns False instead. exception : False | Exception If there is something wrong with preprocessing, return Exception, otherwise return False """ exception = False try: df, player_list = preprocessing.prepare_data(link) return df, player_list, exception except Exception as exception: return False, False, exception
def main(): torch.manual_seed(777) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') parser = argparse.ArgumentParser() parser.add_argument("--path", type=str) parser.add_argument("--embedding_dim", type=int, default=300) parser.add_argument("--iterator", type=int, default=10) parser.add_argument("--lr", type=float, default=1e-5) parser.add_argument("--decay", type=float, default=0.01) parser.add_argument("--batch_size", type=int, default=100) args = parser.parse_args() trg, src = load_pair(args.path) src_token = eng_tokenize(src) trg_token = es_tokenize(trg) trg2idx, idx2_trg = make_dictionary(trg_token) src2idx, idx2src = make_dictionary(src_token) src_ix = make_src_idx(src_token, src2idx) trg_ix = make_trg_idx(trg_token, trg2idx) args.embedding_dim # model 선언부 encoder = EncoderGRU(emb_dim=args.embedding_dim, bidirectional=True, vocab_size=len(src2idx)) attention = Attention(emb_dim=args.embedding_dim, padding_idx=0) decoder = DecoderGRU(emb_dim=args.embedding_dim, attention=attention, n_class=len(trg2idx)) model = Seq2Seq_a(encoder, decoder, device, trg2idx) num_parameter(model) #loss , optimizer 설정 loss_func = nn.CrossEntropyLoss(ignore_index=0) optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.decay) #data 나누기 train_loader, test_loader = prepare_data(src=src_ix, trg=trg_ix, test_size=0.2, batch_size=args.batch_size) train(model, iterator=args.iterator, optimizer=optimizer, criterion=loss_func, train_loader=train_loader, visual_path="ssibal", trg2idx=trg2idx, savepath="./seq2seq_model.pth")
def main(): # items_to_predict = item_selection() # items_to_predict = select_sorted_items(items_to_predict) items_to_predict = ['Amulet_of_strength', "Green_d'hide_vamb", 'Staff_of_fire', 'Zamorak_monk_top', 'Staff_of_air', \ 'Adamantite_bar', 'Zamorak_monk_bottom', 'Adamant_platebody', 'Runite_ore', 'Rune_scimitar', 'Rune_pickaxe', \ 'Rune_full_helm', 'Rune_kiteshield', 'Rune_2h_sword', 'Rune_platelegs', 'Rune_platebody', 'Old_school_bond'] num_features = 2 for item_to_predict in items_to_predict: # =========== PREPROCESSING =========== # SELECT ITEMS items_selected = item_selection() # FEATURE EXTRACTION preprocessed_df = prepare_data(item_to_predict, items_selected) # FEATURE SELECTION & NORMALIZATION selected_df, pred_std, pred_mean = regression_f_test( preprocessed_df, item_to_predict, number_of_features=num_features) # print(selected_df.head()) # print(selected_df.shape) # print("columns with nan: {}".format(selected_df.columns[selected_df.isna().any()].tolist())) # =========== UNIVARIATE =========== uni_config = {} # TRAINING AND SAVING MODEL univariate_rnn(selected_df, item_to_predict) # # LOADING AND APPLYING MODEL # loaded_model = tf.keras.models.load_model('models/{}_uni_model.h5'.format(item_to_predict)) # apply_univariate_test(selected_df, item_to_predict, loaded_model, pred_std, pred_mean) # =========== MULTIVARIATE SINGLE STEP =========== multiS_config = { 'lstm_units': 64, 'EVALUATION_INTERVAL': 300, 'EPOCHS': 10, 'learning_rate': 0.0001, 'num_dropout': 2 } # TRAINING AND SAVING MODEL multivariate_rnn_single(selected_df, item_to_predict, **multiS_config) # # LOADING AND APPLYING MODEL # loaded_model = tf.keras.models.load_model('models/{}_multiS_model.h5'.format(item_to_predict)) # apply_multivariate_single_step_test(selected_df, item_to_predict, loaded_model, pred_std, pred_mean) # =========== MULTIVARIATE MULTI STEP =========== multiM_config = { 'lstm_units': 128, 'EVALUATION_INTERVAL': 400, 'EPOCHS': 15, 'learning_rate': 0.0001, 'num_dropout': 2 } # TRAINING AND SAVING MODEL multivariate_rnn_multi(selected_df, item_to_predict, **multiM_config)
def run(texts): preprocessed = [prepare_data(text) for text in texts] tfidf_scores_transformed = calculate_tf_idf(preprocessed) tfidf_scores = calculate_tf_idf2(preprocessed) print('Are the two tf-idf scores the same?', check_tfidf_similarity(tfidf_scores_transformed[0], tfidf_scores[0])) print_results(tfidf_scores[0], tfidf_scores[1], len(preprocessed)) print('-----------------------------------') print_results(tfidf_scores_transformed[0], tfidf_scores_transformed[1], len(preprocessed))
def recognize_and_display_result(self): ''' 识别左边用户画好的图并将结果显示在右边的区域 :return: ''' self.reset_result_area() data = preprocessing.prepare_data(self.drawing_area) num = self.infer.inference_once(data) self.draw_num(num) # 结果展示完成后需要重置绘图区域以等待用户下一次绘图 self.to_reset_drawing_area = True
def basic(): train, test = preprocessing.prepare_data(True) with open('nn_resultsfeaturedrop_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest split_train, split_labels = preprocessing.split_labels(train) split_train, split_labels = preprocessing.apply_smote( split_train, split_labels) nn_predict(split_train, split_labels, test, writer, {})
def full_hyperparameter_tuning(): # items_to_predict = ['Old_school_bond', 'Rune_platebody', 'Rune_2h_sword', 'Rune_axe',\ # 'Rune_pickaxe', 'Adamant_platebody', 'Amulet_of_power'] items_to_predict = item_selection() items_to_predict = select_sorted_items(items_to_predict) min_features = 2 max_features = 4 for item_to_predict in items_to_predict: for num_features in range(min_features, max_features): # SELECT ITEMS items_selected = item_selection() # FEATURE EXTRACTION preprocessed_df = prepare_data(item_to_predict, items_selected) # FEATURE SELECTION & NORMALIZATION selected_df, pred_std, pred_mean = regression_f_test( preprocessed_df, item_to_predict, number_of_features=num_features) # print(selected_df.head()) # define the grid search parameters batch_size = [16, 32, 64, 128] buffer_size = [30, 50, 100] epochs = [20, 40] eval_interval = [100, 400] num_dropout_layers = [1, 2, 3] num_lstm_units = [32, 64, 128] learning = [0.0001] past_history = [30, 50] # multivariate_rnn_multi_hyperparameter_tuning(selected_df, item_to_predict, eval_interval=eval_interval, \ # learning=learning, past_history=past_history, epochs=epochs, num_lstm_units=num_lstm_units, batch_size=batch_size,\ # buffer_size=buffer_size, num_dropout_layers=num_dropout_layers) # multivariate_rnn_single_hyperparameter_tuning(selected_df, item_to_predict, eval_interval=eval_interval, \ # learning=learning, past_history=past_history, epochs=epochs, num_lstm_units=num_lstm_units, batch_size=batch_size,\ # buffer_size=buffer_size, num_dropout_layers=num_dropout_layers) # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict, batch_size = batch_size, epochs= epochs, \ # past_history=past_history, num_lstm_units=num_lstm_units, eval_interval=eval_interval) multivariate_rnn_single_hyperparameter_tuning(selected_df, item_to_predict, \ num_lstm_units=[128], past_history=[30], eval_interval=[400], num_dropout_layers=[2], learning = [0.0001]) # multivariate_rnn_multi_hyperparameter_tuning(selected_df, item_to_predict, \ # num_lstm_units=num_lstm_units, past_history=past_history, eval_interval=eval_interval) # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict, \ # past_history=range(30,50,5), num_lstm_units=[8], eval_interval=eval_interval) # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict) del selected_df del preprocessed_df gc.collect()
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_results.csv', 'w') as csvfile: writer = csv.writer(csvfile) split_train, split_labels = preprocessing.split_labels(train) nn_predict(split_train, split_labels, test, writer) split_train, split_labels = preprocessing.apply_smote( split_train, split_labels) nn_predict(split_train, split_labels, test, writer)
def basic(): train, test = preprocessing.prepare_data() with open('final_results_basic.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header writer.writerow([ 'class_weight', 'min_samples_split', 'n_estimators', 'vroc_auc', 'vprecision', 'vrecall', 'vf1', 'vfp', 'vfn', 'troc_auc', 'tprecision', 'trecall', 'tf1', 'tfp', 'tfn' ]) # vary each parameter of random forest rf_predict(train, test, 'basic', writer)
def smote_test(): train, test = preprocessing.prepare_data() with open('final_results_smote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header writer.writerow([ 'max_depth', 'n_estimators', 'min_samples_split', 'class_weight', 'max_features' 'vroc_auc', 'vprecision', 'vrecall', 'vf1', 'vfp', 'vfn', 'troc_auc', 'tprecision', 'trecall', 'tf1', 'tfp', 'tfn' ]) rf_predict(train, test, 'smote', writer)
def multiple_balanced_sets(): train, test = preprocessing.prepare_data() train_list = preprocessing.multiple_balanced_samples(train, 5) # separate class label (last column) for i in range(5): train, labels = preprocessing.split_labels(train_list[i]) classifier = linear_model.LogisticRegression() validation.cross_validate_set(classifier, train, labels) validation.cross_validate(classifier, train, labels) classifier.fit(train, labels) # test test, test_labels = preprocessing.split_labels(test) validation.test(classifier, test, test_labels)
def create_data(): questions, answers, vocab_size, tokenizer, start_tk, end_tk = preprocessing.prepare_data( ) dataset = tf.data.Dataset.from_tensor_slices(({ 'inputs': questions, 'dec_inputs': answers[:, :-1] }, { 'outputs': answers[:, 1:] })) dataset = dataset.cache() dataset = dataset.shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) print("\n\n\n", dataset) return dataset, (vocab_size, tokenizer, start_tk, end_tk)
def main(): ######### args parameters trg, src = load_pair(args.path) src_token = eng_tokenize(src) trg_token = es_tokenize(trg) ############################################### trg2idx, idx2trg = make_dictionary(trg_token) src2idx, idx2src = make_dictionary(src_token) src_convert = convert(word2idx=src2idx, idx2word=idx2src) trg_convert = convert(word2idx=trg2idx, idx2word=idx2trg) src_ix = src_convert.from_seq2idx(src_token) trg_ix = trg_convert.from_seq2idx(trg_token) train_loader, test_loader = prepare_data(src=src_ix, trg=trg_ix, test_size=args.test_size, batch_size=args.batch_size, y_vocab=trg2idx) #loss , optimizer 설정 loss_func = nn.CrossEntropyLoss(ignore_index=0) model = ConvS2S(src_size=len(src2idx), tgt_size=len(trg2idx), N=args.num_of_layer, d_vector=512, k_size=3, device=device) optimizer = optim.Adam(model.parameters(), weight_decay=args.weight_decay) train(model=model, iterator=args.iterator, optimizer=optimizer, criterion=loss_func, train_loader=train_loader, test_loader=test_loader)
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_resultsepsilon_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest num_layers_1 = 20 num_layers_2 = 0 num_layers_3 = 0 alpha = 1.e-3 for epsilon in [1.e0, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 1.e-5]: if num_layers_2 == 0 and num_layers_3 != 0: continue split_train, split_labels = preprocessing.split_labels(train) params = {'epsilon': epsilon} nn_predict(split_train, split_labels, test, writer, params)
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_resultslayer2_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest num_layers_1 = 20 num_layers_2 = 0 num_layers_3 = 0 alpha = 1.e-3 for num_layers_2 in range(1, 30): if num_layers_2 == 0 and num_layers_3 != 0: continue split_train, split_labels = preprocessing.split_labels(train) params = {'layer_2': num_layers_2} nn_predict(split_train, split_labels, test, writer, params)
def run(dataset, data_path, model_type, generations, populations): (df, features, label, categorical_features, sensitive_features) = prepare_data(dataset, data_path) X, y = process_categorical(df, features, label, categorical_features) (X_train, X_test, y_train, y_test) = prepare_data_split(X, y) # split_func = split_on_sensitive_attr(X_train) model = Classifier( dataset, model_type, X_train, y_train, X_test, y_test, features, sensitive_features, ) model.fit() nsga_cfg = NSGAConfig( generations=generations, populations=populations, model_type=model_type, X_sensitive_a1=model.X_m, ) X_m = model.X_m X_f = model.X_f y_m = model.y_m y_f = model.y_f X_test_m = model.X_test_m X_test_f = model.X_test_f y_test_m = model.y_test_m y_test_f = model.y_test_f try: run_nsga(nsga_cfg) except Exception as e: pass
def encode_sentences(curr_model, pair, batch_size=128, test=False): """ Encode sentences into the joint embedding space """ en_feats = numpy.zeros((len(pair[0]), curr_model['options']['dim']), dtype='float32') cn_feats = numpy.zeros((len(pair[0]), curr_model['options']['dim']), dtype='float32') data_index = prepare_data(pair, curr_model['worddict'], test=test) cur = 0 for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator(data_index, batch_size): en, cn = curr_model['en_cn_model'].forward(en, en_lengths, en_index, cn, cn_lengths, cn_index) en = en.data.cpu().numpy() cn = cn.data.cpu().numpy() for i in xrange(batch_size): if i + cur >= len(pair[0]): break for j in xrange(curr_model['options']['dim']): en_feats[i + cur][j] = en[i][j] cn_feats[i + cur][j] = cn[i][j] cur += batch_size en_feats = Variable(torch.from_numpy(en_feats).cuda()) cn_feats = Variable(torch.from_numpy(cn_feats).cuda()) return en_feats, cn_feats
w = option == "-write" r = option == "-read" args = sys.argv[2:] # Load embeddings (word_ids, embeddings) = pickle.load(open(args[2], "rb")) # Load or pre-process data if r: train_data = pickle.load(open(args[0], "rb")) valid_data = pickle.load(open(args[1], "rb")) else: char_map = Numberer() label_map = Numberer() emolex = read_emolex(args[3]) train_data = prepare_data(args[0], word_ids, emolex, char_map, label_map) valid_data = prepare_data(args[1], word_ids, emolex, char_map, label_map) if w: with open("traindata", "wb") as train_file: pickle.dump(train_data, train_file) with open("testdata", "wb") as test_file: pickle.dump(valid_data, test_file) # Get batches config = DefaultConfig() train_batches = generate_batches(*train_data, batch_size=config.batch_size) validation_batches = generate_batches(*valid_data, batch_size=config.batch_size) # Execute the model
layers=[[H, H] for i in range(Lmax)], n_start=n_start, save_path="models/bifinn/N_{0}_H_{1}".format(N, H), status=status, logfile="BiFiNN_N_{0}_H_{1}.log".format(N, H)) mbifinn = Modified_BiFiNN(z_shape=z_shape, Lmax=Lmax, layers=[[H, H] for i in range(Lmax)], n_start=n_start, save_path="models/mbifinn/N_{0}_H_{1}".format( N, H), status=status, logfile="MBiFiNN_N_{0}_H_{1}.log".format(N, H)) train_data, test_data = prepare_data("dataset.mat", L=Lmax, train_index=train_index, test_index=test_index, basis_index=basis_index) if status == "train": podnn.train(train_data, batch_size=batch_size, epochs=epochs, verbose=verbose) mpodnn.train(train_data, batch_size=batch_size, epochs=epochs, verbose=verbose) bifinn.train(train_data, batch_size=batch_size, epochs=epochs, verbose=verbose)
BATCH_SIZE = 1 MINI_BATCHES = 9 EPOCHS = 5000 LEARNING_RATE = 1e-3 N_CELLS = 250 N_CLASSES = len(CLASSES) print('Dataset Size: {}\n'.format(len(SOUND_FILE_PATHS))) # If the data hasn't been preprocessed, then do it now. if not os.path.exists(TF_RECORDS_META) and \ not os.path.exists(TF_RECORDS_TRAIN) and \ not os.path.exists(TF_RECORDS_TEST): FEATURES_MIN, FEATURES_MAX, FEATURES_MEAN = prepare_data( SOUND_FILE_PATHS, TF_RECORDS_DESTINATION, MAX_SAMPLES) with open(TF_RECORDS_META, 'w') as OUTPUT: OUTPUT.write('{},{},{}'.format(FEATURES_MIN, FEATURES_MAX, FEATURES_MEAN)) else: with open(TF_RECORDS_META, 'r') as INPUT: META_DATA = INPUT.readline() FEATURES_MIN, FEATURES_MAX, FEATURES_MEAN = [ float(DATA_POINT) for DATA_POINT in META_DATA.split(',') ] print('Training Set Size: {}'.format(int(len(SOUND_FILE_PATHS) * .9))) print('Test Set Size: {}\n'.format(int(len(SOUND_FILE_PATHS) * .1))) def variable_on_cpu(name, shape, initializer, dtype=tf.float32):
for i in range(self.Lmax): coeff_errors.append( compute_error(c_high[:, :(i + 1)], c_pred[i], scale=u_high)) approx_errors.append(compute_error(u_high, u_pred[i], scale=u_high)) return { "c": c_pred, "u": u_pred, "coeff_errors": coeff_errors, "approx_errors": approx_errors } def extend_data(self, data): new_data = copy.deepcopy(data) new_data["z"] = np.concatenate([data["z"], data["c_low"]], axis=1) return new_data if __name__ == "__main__": from preprocessing import prepare_data bifinn = Modified_BiFiNN(z_shape=10, Lmax=2, layers=[[16, 16] for i in range(2)], n_start=1) train_data, test_data = prepare_data("examples/example4/dataset.mat", L=2, train_index=range(500), test_index=range(500, 600), basis_index=range(600, 880)) bifinn.train(train_data, batch_size=100, epochs=10, verbose=0) print(bifinn.load_and_test(test_data))
default=config.tokenized_path, help='path to the training data') args = parser.parse_args() path = args.path token_path = args.token_path articles, summaries, dic = read_files(path, token_path) # for i in dic.word2idx.keys(): # print(i, dic.word2idx[i]) # exit() word_count = len(dic) print('Number of unique words:', word_count) art_idx = prepare_data(articles, dic) sum_idx = prepare_summary(summaries, dic) # hello = prepare_data(['my name is pasquale'], dic) # unked_hello = get_unked(hello, dic) # print(hello) # print(unked_hello) # exit() #prepare TRAIN train_path = 'train_all.txt' valid_path = 'val_all.txt' test_path = 'test_all.txt' dic_path = 'dictionary' out_path = 'data_finish/'
def main(): # Get the seconds since epoch current_timestamp = int(time.time()) print("{} - predicting items".format(current_timestamp)) model_types = ['uni', 'multiS', 'multiM'] # SELECT ITEMS items_selected = item_selection(drop_percentage=0.5) items_to_predict = ['Amulet_of_strength', "Green_d'hide_vamb", 'Staff_of_fire', 'Zamorak_monk_top', 'Staff_of_air', \ 'Adamantite_bar', 'Zamorak_monk_bottom', 'Adamant_platebody', 'Runite_ore', 'Rune_scimitar', 'Rune_pickaxe', \ 'Rune_full_helm', 'Rune_kiteshield', 'Rune_2h_sword', 'Rune_platelegs', 'Rune_platebody', 'Old_school_bond'] preprocessed_df = None for item_to_predict in items_to_predict: # GET LIST OF FEATURES if not os.path.isfile('models/features/{}_{}_features.txt'.format( item_to_predict, model_types[0])): print( "Model for {} hasn't been created, please run models.py first." .format(item_to_predict)) return specific_feature_list = [] with open( 'models/features/{}_{}_features.txt'.format( item_to_predict, model_types[0]), 'r') as filehandle: specific_feature_list = json.load(filehandle) t0 = time.time() # FEATURE EXTRACTION preprocessed_df = prepare_data(item_to_predict, items_selected, DATA_FOLDER="data/rsbuddy/", \ reused_df=preprocessed_df, specific_features=specific_feature_list) t1 = time.time() # FEATURE SELECTION & NORMALIZATION selected_df, pred_std, pred_mean = regression_f_test(preprocessed_df, item_to_predict, \ specific_features=specific_feature_list, number_of_features=len(specific_feature_list)-1) t2 = time.time() predictions = [] for model_type in model_types: # LOADING AND APPLYING MODEL loaded_model = tf.keras.models.load_model( 'models/{}_{}_model.h5'.format(item_to_predict, model_type)) if (model_type == 'uni'): result = apply_univariate(selected_df, item_to_predict, loaded_model, pred_std, pred_mean) elif (model_type == 'multiS'): result = apply_multivariate_single_step( selected_df, item_to_predict, loaded_model, pred_std, pred_mean) elif (model_type == 'multiM'): result = apply_multivariate_multi_step(selected_df, item_to_predict, loaded_model, pred_std, pred_mean) else: print("Unrecognized model type.") predictions.extend(result) tf.keras.backend.clear_session() t3 = time.time() print( 'TIME LOG - preprocessing: {}, feature selection: {}, prediction: {}' .format(t1 - t0, t2 - t1, t3 - t2)) new_predictions = [int(i) for i in predictions] print('item: {}, pred: {}'.format(item_to_predict, new_predictions)) if os.path.isfile('data/predictions/{}.csv'.format(item_to_predict)): appendToCSV(item_to_predict, new_predictions, current_timestamp) else: writeToCSV(item_to_predict, new_predictions, current_timestamp)
def smote_test(): train, test = preprocessing.prepare_data() lr_predict(train, test, 'smote')
def basic(): train, test = preprocessing.prepare_data() lr_predict(train, test, 'basic')
train_losses[i] = train_loss test_losses[i] = test_loss dt = datetime.now() - t0 print( f'epoch {i+1}/{epochs} | train_loss: {train_loss:.4f} | test_oss: {test_loss:.4f} | duration: {dt}' ) return train_losses, test_losses # main if __name__ == '__main__': # initialise train_images, train_labels, test_images, test_labels, train_positives, train_negatives, test_positives, test_negatives = prepare_data( datapath, H, W) model = SiameseModel(feature_dim) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) optimizer = torch.optim.Adam(model.parameters()) train_steps = int(np.ceil(len(train_positives) / batch_size)) test_steps = int(np.ceil(len(test_positives) / batch_size)) # training loop train_losses, test_losses = train(model, contrastive_loss, optimizer, run_generator(train_positives, train_negatives, train_images),
def smote_test(): train, test = preprocessing.prepare_data() train = preprocessing.undersample_negative_class(train, 5000) svm_predict(train, test, 'smote')
def basic(): train, test = preprocessing.prepare_data() train = preprocessing.undersample_negative_class(train, 1000) svm_predict(train, test, 'basic')