def change_brand(brandcode,brandcodes,label_type): dataset = Nikkei(brandcode = brandcode) dataset.unify_stockprices(dataset = dataset.raw_data[brandcode],brandcodes=brandcodes,label_type = label_type) reguralize_data(dataset, brandcodes) for datatype in ['train', 'valid', 'test']: dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode] return dataset
def change_brand(brandcode, brandcodes, label_type): dataset = Nikkei(brandcode=brandcode) dataset.unify_stockprices(dataset=dataset.raw_data[brandcode], brandcodes=brandcodes, label_type=label_type) reguralize_data(dataset, brandcodes) for datatype in ['train', 'valid', 'test']: dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode] return dataset
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1,model_type=2): print 'STEP 3 start...' if dataset == None: if params['experiment_type'] == 'baseline': print 'start to load baseline dataset...' dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original')) elif params['experiment_type'] == 'chi2_selected': print 'start to load chi2_selected...' dataset = Nikkei() else: print 'start to load proposed dataset...' dataset = cPickle.load(open(model_dirs['STEP2'])) print 'start to unify stockprice...' # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type) if params['experiment_type'] != 'chi2_selected': dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes, dataset_type=params['experiment_type'],label_type=label_type) else: dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type) reguralize_data(dataset, brandcodes) change_brand(dataset, brandcodes[0]) if model_type == 0: def transformY(data_y): y = [] if label_type < 3: for data in data_y: y.append(data[0]) return numpy.array(y) else : for data in data_y: y.append(data) return numpy.array(y) train_x = dataset.phase2['train']['x'] train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0) test_x = dataset.phase2['test']['x'] while(1): if params['experiment_type'] == 'baseline': train_x_original = train_x test_x_original = test_x pca = PCA(n_components=1000) pca.fit(train_x_original) train_x = pca.transform(train_x_original) test_x = pca.transform(test_x_original) train_y = transformY(dataset.phase2['train']['y']) train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0) test_y = transformY(dataset.phase2['test']['y']) if label_type < 3: tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}] gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10) else: print 'classification' tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}] gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10) gscv.fit(train_x, train_y) best_model = gscv.best_estimator_ predict_y = best_model.predict(test_x) result_train = (best_model.predict(train_x) == train_y).sum() result_test = (best_model.predict(test_x) == test_y).sum() print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y)) print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y)) pdb.set_trace() pretrain_params = { 'dataset' : dataset, 'hidden_layers_sizes' : params['STEP4']['hidden_layers_sizes'], 'pretrain_lr' : params['STEP4']['pretrain']['learning_rate'], 'pretrain_batch_size' : params['STEP4']['pretrain']['batch_size'], 'pretrain_epochs' : params['STEP4']['pretrain']['epochs'], 'corruption_levels' : params['STEP4']['corruption_levels'], 'k' : params['STEP4']['k'], 'hidden_recurrent': params['STEP4']['hidden_recurrent'], 'n_outs' : (1 + y_type) } pretrain_model = model.pretrain(pretrain_params, y_type) pretrain_params = get_model_params(pretrain_model) while(1): finetune_params = { 'dataset' : dataset, 'model' : pretrain_model, 'finetune_lr' : params['STEP4']['finetune']['learning_rate'], 'finetune_batch_size' : params['STEP4']['finetune']['batch_size'], 'finetune_epochs' : params['STEP4']['finetune']['epochs'] } finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type) pdb.set_trace() set_model_params(pretrain_model, pretrain_params)
if __name__ == '__main__': ### 銘柄数種について実験 all_size = len(params['STEP3']['brandcode']) * len( params['STEP4']['hidden_layers_sizes'] ) * len(params['STEP4']['pretrain']['batch_size']) * len( params['STEP4']['pretrain']['learning_rate']) * len( params['STEP4']['pretrain']['epochs']) * len( params['STEP4']['finetune']['batch_size']) * len( params['STEP4']['finetune']['learning_rate']) * len( params['STEP4']['finetune']['epochs']) i = 0 for brandcode in params['STEP3']['brandcode']: model_dirs = reload_model_dirs(brandcode) dataset = Nikkei(dataset_type=params['dataset_type'], brandcode=brandcode) dataset.unify_stockprices(dataset.raw_data[brandcode]) for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']: for hidden_recurrent in params['STEP4']['hidden_recurrent']: for batch_size_pretrain in params['STEP4']['pretrain'][ 'batch_size']: for learning_rate_pretrain in params['STEP4']['pretrain'][ 'learning_rate']: for epochs_pretrain in params['STEP4']['pretrain'][ 'epochs']: for batch_size_finetune in params['STEP4'][ 'finetune']['batch_size']: for learning_rate_finetune in params['STEP4'][ 'finetune']['learning_rate']: for epochs_finetune in params['STEP4'][ 'finetune']['epochs']: result = train_rnnrbm(
def predict(dataset, model, brandcodes=["0101"], label_type=1, y_type=1, model_type=2): print "STEP 3 start..." if dataset == None: if params["experiment_type"] == "baseline": print "start to load baseline dataset..." dataset = cPickle.load(open(default_model_dir + "/STEP2/baseline_original")) elif params["experiment_type"] == "chi2_selected": print "start to load chi2_selected..." dataset = Nikkei() else: print "start to load proposed dataset..." dataset = cPickle.load(open(model_dirs["STEP2"])) print "start to unify stockprice..." # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type) if params["experiment_type"] != "chi2_selected": dataset.unify_stockprices( dataset=dataset.baseline_original, brandcodes=brandcodes, dataset_type=params["experiment_type"], label_type=label_type, ) else: dataset.unify_stockprices(dataset=dataset.raw_data[brandcodes[0]], brandcodes=brandcodes, label_type=label_type) reguralize_data(dataset, brandcodes) change_brand(dataset, brandcodes[0]) if model_type == 0: def transformY(data_y): y = [] if label_type < 3: for data in data_y: y.append(data[0]) return numpy.array(y) else: for data in data_y: y.append(data) return numpy.array(y) train_x = dataset.phase2["train"]["x"] train_x = numpy.append(train_x, dataset.phase2["valid"]["x"], 0) test_x = dataset.phase2["test"]["x"] while 1: if params["experiment_type"] == "baseline": train_x_original = train_x test_x_original = test_x pca = PCA(n_components=1000) pca.fit(train_x_original) train_x = pca.transform(train_x_original) test_x = pca.transform(test_x_original) train_y = transformY(dataset.phase2["train"]["y"]) train_y = numpy.append(train_y, transformY(dataset.phase2["valid"]["y"]), 0) test_y = transformY(dataset.phase2["test"]["y"]) if label_type < 3: tuned_parameters = [ {"kernel": ["rbf"], "gamma": [10 ** i for i in range(-4, 0)], "C": [10 ** i for i in range(0, 4)]} ] gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10) else: print "classification" tuned_parameters = [ { "kernel": ["rbf", "linear"], "gamma": [10 ** i for i in range(-4, 0)], "C": [10 ** i for i in range(0, 4)], } ] gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10) gscv.fit(train_x, train_y) best_model = gscv.best_estimator_ predict_y = best_model.predict(test_x) result_train = (best_model.predict(train_x) == train_y).sum() result_test = (best_model.predict(test_x) == test_y).sum() print "training accuracy : %.2f , %d / %d" % ( float(result_train) / len(train_y), result_train, len(train_y), ) print "testing accuracy : %.2f , %d / %d" % (float(result_test) / len(test_y), result_test, len(test_y)) pdb.set_trace() pretrain_params = { "dataset": dataset, "hidden_layers_sizes": params["STEP4"]["hidden_layers_sizes"], "pretrain_lr": params["STEP4"]["pretrain"]["learning_rate"], "pretrain_batch_size": params["STEP4"]["pretrain"]["batch_size"], "pretrain_epochs": params["STEP4"]["pretrain"]["epochs"], "corruption_levels": params["STEP4"]["corruption_levels"], "k": params["STEP4"]["k"], "hidden_recurrent": params["STEP4"]["hidden_recurrent"], "n_outs": (1 + y_type), } pretrain_model = model.pretrain(pretrain_params, y_type) pretrain_params = get_model_params(pretrain_model) while 1: finetune_params = { "dataset": dataset, "model": pretrain_model, "finetune_lr": params["STEP4"]["finetune"]["learning_rate"], "finetune_batch_size": params["STEP4"]["finetune"]["batch_size"], "finetune_epochs": params["STEP4"]["finetune"]["epochs"], } finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type) pdb.set_trace() set_model_params(pretrain_model, pretrain_params)
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1, m=1): print 'STEP 3 start...' if dataset == None: if params['experiment_type'] == 'baseline': print 'start to load baseline dataset...' dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original')) elif params['experiment_type'] == 'chi2_selected': print 'start to load chi2_selected dataset...' dataset = Nikkei() else: print 'start to load proposed dataset...' dataset = cPickle.load(open(model_dirs['STEP2'])) print 'start to unify stockprice...' # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type) if params['experiment_type'] == 'chi2_selected': dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type) else: dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes, dataset_type=params['experiment_type'],label_type=label_type) reguralize_data(dataset, brandcodes) change_brand(dataset, brandcodes[0]) #change_brand(dataset, '0101') if m==0: def transformY(data_y): y = [] if label_type < 3: for data in data_y: y.append(data[0]) return numpy.array(y) else : for data in data_y: y.append(data) return numpy.array(y) train_x = dataset.phase2['train']['x'] train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0) test_x = dataset.phase2['test']['x'] while(1): if params['experiment_type'] == 'baseline': train_x_original = train_x test_x_original = test_x pca = PCA(n_components=1000) pca.fit(train_x_original) train_x = pca.transform(train_x_original) test_x = pca.transform(test_x_original) train_y = transformY(dataset.phase2['train']['y']) train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0) test_y = transformY(dataset.phase2['test']['y']) if label_type < 3: tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}] gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10) else: print 'classification' tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}] gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10) gscv.fit(train_x, train_y) best_model = gscv.best_estimator_ predict_y = best_model.predict(test_x) result_train = (best_model.predict(train_x) == train_y).sum() result_test = (best_model.predict(test_x) == test_y).sum() print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y)) print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y)) pdb.set_trace() ################## #ファイル名の決定# ################## if params['experiment_type'] == 'chi2_selected': yoshihara_dir = 'chi2' else: yoshihara_dir = 'experiment' if y_type ==0: type = "regression" else: type = "classification" if m == 1: model_type = "sda" elif m==2: model_type = "dbn_only" elif m==4: model_type = "rnnrbm_mlp" elif m==5: model_type = "rnnrbm_dbn" elif m==6: model_type = "dbn_rnnrbm" #model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir, 'result',type, label_type, model_type) model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir,yoshihara_dir,type, label_type, model_type) all_size = len(params['STEP4']['hidden_recurrent']) * len(params['STEP4']['hidden_layers_sizes']) * len(params['STEP4']['pretrain']['batch_size']) * len(params['STEP4']['pretrain']['learning_rate']) * len(params['STEP4']['pretrain']['epochs']) * len(params['STEP4']['finetune']['batch_size']) * len(params['STEP4']['finetune']['learning_rate']) * len(params['STEP4']['finetune']['epochs']) i = 0 for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']: for batch_size_pretrain in params['STEP4']['pretrain']['batch_size']: for learning_rate_pretrain in params['STEP4']['pretrain']['learning_rate']: for epochs_pretrain in params['STEP4']['pretrain']['epochs']: for hidden_recurrent in params['STEP4']['hidden_recurrent']: pretrain_params = "" pretrain_params = { 'dataset' : dataset, 'hidden_layers_sizes' : hidden_layers_sizes, 'pretrain_lr' : learning_rate_pretrain, 'pretrain_batch_size' : batch_size_pretrain, 'pretrain_epochs' : epochs_pretrain, 'corruption_levels' : params['STEP4']['corruption_levels'], 'k' : params['STEP4']['k'], 'hidden_recurrent': hidden_recurrent, 'n_outs' : (1 + y_type) } pretrain_model = "" pretrain_model = model.pretrain(pretrain_params, y_type) pretrain_params = "" pretrain_params = get_model_params(pretrain_model) for brandcode in brandcodes: change_brand(dataset,brandcode) for batch_size_finetune in params['STEP4']['finetune']['batch_size']: for learning_rate_finetune in params['STEP4']['finetune']['learning_rate']: for epochs_finetune in params['STEP4']['finetune']['epochs']: set_model_params(pretrain_model,pretrain_params) finetune_params = "" finetune_params = { 'dataset' : dataset, 'model' : pretrain_model, 'finetune_lr' : learning_rate_finetune, 'finetune_batch_size' : batch_size_finetune, 'finetune_epochs' : epochs_finetune } finetune_model = "" best_validation_loss = "" test_score = "" best_epoch = "" finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type) i += 1 print '%d / %d is done...' % (i , all_size) out = open(model_dirs['STEP4_logs'], 'a') out.write('%f,%f,%s,%s,%d,%f,%d,%d,%d,%f,%d,%d,%s\n' % (best_validation_loss, test_score, brandcode, str(hidden_layers_sizes).replace(',', ' '), batch_size_pretrain, learning_rate_pretrain, hidden_recurrent, epochs_pretrain, batch_size_finetune, learning_rate_finetune, epochs_finetune, best_epoch, str(datetime.datetime.now()))) out.close() gc.collect()