def train_tasks(parameters): # GPU setting if parameters['use_cuda']: os.environ['CUDA_VISIBLE_DEVICES'] = parameters['gpu'] if len(parameters['tasks']) > 1: print('please train 1 task at once') return task_id = parameters['tasks'][0] # Log setting log_suffix = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) if parameters['note']: log_suffix = parameters['note'] + '_' + log_suffix log_file = log_dir + 'split_train_task_{}_{}.txt'.format(task_id, log_suffix) train_logger = logger(log_file, parameters) train_logger.log('\n') parameters['logger'] = train_logger # start training task = tasks[task_id] data_prefix = 'task_' + str(task_id) parameters['logger'].log_dict(task) parameters['logger'].log('dataset prefix: ' + data_prefix) parameters['model_prefix'] = 'split_train_task_{}'.format(task_id) # Dataset X_train = np.load(saves_directory + data_prefix + '_X_train.npy', allow_pickle=True) y_train = np.load(saves_directory + data_prefix + '_y_train.npy', allow_pickle=True) X_val = np.load(saves_directory + data_prefix + '_X_val.npy', allow_pickle=True) y_val = np.load(saves_directory + data_prefix + '_y_val.npy', allow_pickle=True) X_test = np.load(saves_directory + data_prefix + '_X_test.npy', allow_pickle=True) y_test = np.load(saves_directory + data_prefix + '_y_test.npy', allow_pickle=True) dataset_indices = [i+1 for i in paper_tasks.dataset_indices[feature_set]] field_indices = [i+2 for i in paper_tasks.field_indices[feature_set]] if task['dataset'] == 'dataset': X_train = X_train [:, dataset_indices] X_val = X_val [:, dataset_indices] X_test = X_test [:, dataset_indices] else: assert task['dataset'] == 'field' X_train = X_train [:, field_indices] X_val = X_val [:, field_indices] X_test = X_test [:, field_indices] parameters['logger'].log('loaded train X size: ' + str(X_train.shape)) parameters['logger'].log('loaded train y size: ' + str(y_train.shape)) parameters['logger'].log('loaded val X size: ' + str(X_val.shape)) parameters['logger'].log('loaded val y size: ' + str(y_val.shape)) parameters['logger'].log('loaded test X size: ' + str(X_test.shape)) parameters['logger'].log('loaded test y size: ' + str(y_test.shape)) # get dataloader train_dataloader, val_dataloader, test_dataloader = train.load_datasets( X_train, y_train, X_val, y_val, parameters, X_test, y_test) train.train( train_dataloader, val_dataloader, test_dataloader, parameters, models_directory=models_directory)
def main(): parameters = { 'batch_size': 200, 'num_epochs': 100, 'hidden_sizes': [1000, 1000, 1000], 'learning_rate': 5e-4, 'output_dim': 3, 'weight_decay': 0, 'dropout': 0.00, 'patience': 15, 'threshold': 1e-3, 'current_field': 5, 'max_fields': 5, 'output_period': 0 } assert len( sys.argv) >= 2, 'You must specify a command dm, ds, fm, fs, cm, cs' cmd = sys.argv[1] # dm = Dataset Mixed, train on all dataset level features up to max_fields # ds = Dataset Single, train on dataset level features only for current_field # fm = Field Mixed, train on all field level features up to max_fields # fs = Field Single, train on field level features only for current_field # cm = Combined Mixed, train on dataset+field level features up to max_fields # cs = Combined Single, train on dataset+field level features only for current_field # all validation/test accuracies are based on current_field print('command is ' + cmd) include_field = cmd[0] num_fields_list = [parameters['current_field']] if 's' in cmd else list( range(1, parameters['max_fields'] + 1)) train_dataloader = get_training_dataloader(parameters, include_field, num_fields_list) val_dataloader = get_val_or_test_dataloader(parameters, include_field) train.train(train_dataloader, val_dataloader, None, parameters)
def main(): rows = [] outputs = [] for (code, ndata) in get_news().iteritems(): row = normalize.row({ "news": ndata }) rows.append(row) outputs.append(random.randint(1,100)) model = modeler.train(rows, outputs) print model
def main(): # all the parameters for the script can be adjusted here # num_epochs: the max number of epochs we will train the NN for # hidden_sizes: the number of neurons in each hidden layer, enter it as a list # output_dim: the dimension of the output. Since outputs are 'line', 'scatter', 'bar', it's 3 # weight_decay: how much to decay LR in the NN. This can be set to 0 since we decrease LR already through # the ReduceLROnPlateau() function # dropout: the dropout in each layer # patience: how many epochs we go through (with a near constant learning rate, this threshold is adjusted using # threshold) before dropping learning rate by a factor of 10 # model_prefix: all models will be loaded/saved with the prefix of the file in the beginning # save_model: save each epoch's model onto models/ folder. # print_test: print test accuracies into test.txt # test_best: test the test accuracy of the best model we've found (best # model determined using val accuracy) # note: training is automatically stopped when learning rate < 0.01 * # starting learning rate parameters = { 'batch_size': 200, 'num_epochs': 100, 'hidden_sizes': [800, 800, 800], 'learning_rate': 5e-4, 'output_dim': 3, 'weight_decay': 0, 'dropout': 0.00, 'patience': 20, 'threshold': 1e-3, 'model_prefix': 'agg', 'save_model': False, 'print_test': True, 'test_best': False } # LOAD loads the unfiltered features from the .csv files and converts them into filtered .npy files into ~/saves # TRAIN trains using the given parameters and .npy files # EVAL evaluates prefix_.model_number (giving you test accuracy) assert len( sys.argv) >= 2, 'You must specify a command LOAD, TRAIN, or EVAL' assert (parameters['model_prefix'] ), 'You must specify a prefix for the model name' if parameters['test_best']: assert parameters[ 'save_model'], 'You must save a model to test the best version!' command = sys.argv[1].lower() if command == 'load': X, y = load_features() # here, we split 10% of examples into val, and 10% into test util.save_matrices_to_disk(X, y, [0.1, 0.1], saves_directory, parameters['model_prefix'], num_datapoints) return X_train, y_train, X_val, y_val, X_test, y_test = util.load_matrices_from_disk( saves_directory, parameters['model_prefix'], num_datapoints) if command == 'train': train_dataloader, val_dataloader, test_dataloader = train.load_datasets( X_train, y_train, X_val, y_val, parameters, X_test=X_test, y_test=y_test) train.train(train_dataloader, val_dataloader, test_dataloader, parameters) elif command == 'eval': assert len(sys.argv) >= 3 model_suffix = sys.argv[2] evaluate.evaluate(model_suffix, X_test, y_test, parameters) else: assert False, 'The command must either be LOAD, TRAIN, or EVAL'
data['change'], data['pct_chg'], data['vol'], data['amount'], data['turnover_rate'], data['volume_ratio'], data['total_share'], data['float_share'], data['free_share'], data['total_mv'], data['circ_mv'] ] x.append(tmp) x = np.reshape(np.array(x, dtype='float_'), (1, seq_len, len(tmp))) return x if __name__ == '__main__': stocks_list = [] file_path = 'C:\\Users\\Admin\\Documents\\ZCIT-Projects\\PythonProj\\Quantification\\ml\\stocks_price.txt' logger = log.init_logging(file_path, 'info') # 链接数据库 conn = dbpool.MyPymysqlPool(None, 'MysqlDatabaseInfo') # 获取股票ts代码 for d in conn.getAll('select ts_code from t_stocks'): stocks_list.append(d['ts_code']) conn.dispose() logger.info('tscode\tpred_price\tchange') for stock in stocks_list: x = get_newer_five_data(stock, tp.TIMESTEP) if x is None: continue _, price = tp.train(stock, save=False, pred=True, x=x) logger.info('%s\t%f\t%f' % (stock, float(price), (float(price) - x[0][-1][3]) / float(x[0][-1][3])))
from random import shuffle from ml.mlp import MLP from ml.train import train if __name__ == "__main__": # create a dataset, this is a simple 1-x approximation Ns = [10, 100, 500, 1000] for N in Ns: items = [x / N for x in range(N)] error = 0 # train 10 networks for i in range(10): shuffle(items) targets = [1 - x for x in items] # create a Multilayer Perceptron with one hidden layer mlp = MLP(1, (3, ), 1) # train the network err = train(mlp, items, targets) # keep track of the error error += err # Finally, report the average error print("N={}:\t{:.6f}".format(N, error / 10))
def main(): tasks = [None, {'outcome_variable_name': 'all_one_trace_type', 'prediction_task': 'two', 'sampling_mode': 'over', 'pref_id': 1, 'dataset': 'dataset'}, {'outcome_variable_name': 'all_one_trace_type', 'prediction_task': 'three', 'sampling_mode': 'over', 'pref_id': 2, 'dataset': 'dataset'}, {'outcome_variable_name': 'all_one_trace_type', 'prediction_task': 'six', 'sampling_mode': 'over', 'pref_id': 3, 'dataset': 'dataset'}, {'outcome_variable_name': 'has_single_src', 'prediction_task': 'two', 'sampling_mode': 'over', 'pref_id': 4, 'dataset': 'dataset'}, {'outcome_variable_name': 'num_x_axes', 'prediction_task': 'numeric', 'sampling_mode': 100, 'pref_id': 5, 'dataset': 'dataset'}, #10000 {'outcome_variable_name': 'num_y_axes', 'prediction_task': 'numeric', 'sampling_mode': 100, 'pref_id': 6, 'dataset': 'dataset'}, #10000 {'outcome_variable_name': 'trace_type', 'prediction_task': 'two', 'sampling_mode': 'over', 'pref_id': 7, 'dataset': 'field'}, {'outcome_variable_name': 'trace_type', 'prediction_task': 'three', 'sampling_mode': 'over', 'pref_id': 8, 'dataset': 'field'}, {'outcome_variable_name': 'trace_type', 'prediction_task': 'six', 'sampling_mode': 'over', 'pref_id': 9, 'dataset': 'field'}, {'outcome_variable_name': 'is_single_src', 'prediction_task': 'two', 'sampling_mode': 'over', 'pref_id': 10, 'dataset': 'field'}, {'outcome_variable_name': 'is_x_or_y', 'prediction_task': 'two', 'sampling_mode': 'over', 'pref_id': 11, 'dataset': 'field'}, ] for i in [1]: #range(2, 12): # range(7, len(tasks)): task = tasks[i] model_prefix = 'paper_' + task['dataset'] + '_' + str(task['pref_id']) parameters = { 'batch_size': 200, 'num_epochs': 100, 'hidden_sizes': [1000, 1000, 1000], 'learning_rate': 5e-4, 'weight_decay': 0, 'dropout': 0.00, 'patience': 10, 'threshold': 1e-3, 'model_prefix': model_prefix, # uncomment this if you want to print test accuracies/save model 'only_train': False, 'save_model': True, 'print_test': True, # for constructing learning curves 'dataset_ratios': [0.01, 0.1, 0.5, 1.0], 'test_best': True, 'use_cuda': False } if parameters['use_cuda'] == True: os.environ["CUDA_VISIBLE_DEVICES"] = '0' for feature_set in [3]: #[0, 1, 2, 3]: # range(0, 4): # dimensions, types, values, names assert len(sys.argv) >= 2, 'You must specify a command LOAD, TRAIN, or EVAL' assert(parameters['model_prefix']), 'You must specify a prefix for the model name' if 'test_best' in parameters and parameters['test_best']: assert parameters['save_model'], 'You must save a model to test the best version!' command = sys.argv[1].lower() if command == 'load': X, y = load_features(task) util.save_matrices_to_disk( X, y, [0.2, 0.2], saves_directory, parameters['model_prefix'], num_datapoints) else: X_train, y_train, X_val, y_val, X_test, y_test = util.load_matrices_from_disk( saves_directory, parameters['model_prefix'], num_datapoints) if task['dataset'] == 'dataset': X_train = X_train[:, dataset_indices[feature_set]] X_val = X_val[:, dataset_indices[feature_set]] X_test = X_test[:, dataset_indices[feature_set]] else: assert task['dataset'] == 'field' X_train = X_train[:, field_indices[feature_set]] X_val = X_val[:, field_indices[feature_set]] X_test = X_test[:, field_indices[feature_set]] print('loaded dimensions are', X_train.shape) print('task_num and feature_set:(' + str(task['pref_id']) + ',' + str(feature_set) + ')') if command == 'train': log_file = log_dir + 'training_task_' + str(task['pref_id']) + '.txt' train_logger = logger(log_file, task) train_dataloader, val_dataloader, test_dataloader = train.load_datasets( X_train, y_train, X_val, y_val, parameters, X_test, y_test, train_logger) train.train( train_dataloader, val_dataloader, test_dataloader, parameters, models_directory=models_directory, suffix=suffix, logger=train_logger) elif command == 'eval': assert len(sys.argv) >= 3 model_suffix = sys.argv[2] log_file = log_dir + 'testing_task_' + str(task['pref_id']) + '.txt' test_logger = logger(log_file, task) train_dataloader, val_dataloader, test_dataloader = train.load_datasets( X_train, y_train, X_val, y_val, parameters, X_test, y_test, test_logger) evaluate.evaluate( model_suffix, test_dataloader, parameters, models_directory) else: assert False, 'The command must either be LOAD, TRAIN, or EVAL'