def play(inp, game, custom=False): if custom: value = read_data.get_data(game, custom=True) else: value = read_data.get_data(game) game_data = value['game_data'] content_data = value['content_data'] # print(game_data) x = game_data[inp] opt_val = [content_data[i] for i in x['options']] if 'none' in opt_val: print(inp, opt_val, x) return {'error':content_data[inp]} else: # print(x) # print('Chapter:{}\nFact:{}'.format( # content_data[x['chapter']], x['fact'])) # ops = x['options'] # print(ops) # print(content_data[inp]) # for i in ops: # print('----------------\nOption:{}\nNext:{}\nMore:{}'.format( # content_data[i], ops[i]['next'], ops[i]['more'])) # ret_dct = { # 'chapter': x['chapter'], # 'fact': x['fact'], # 'options':x[options] # } x['question'] = inp return x
def plot(stock_name, first_date, last_date): tf = 'day' df = pd.DataFrame(rd.get_data(stock_name, tf, first_date, last_date)) df["date"] = pd.to_datetime(df["date"]) mids = (df.open + df.close) / 2 spans = abs(df.close - df.open) inc = df.close > df.open dec = df.open > df.close nor = df.close = df.open w = 12 * 60 * 60 * 1000 # half day in ms output_file("candlestick.html", title="candlestick.py example") TOOLS = "pan,wheel_zoom,box_zoom,reset,save" p = figure(x_axis_type="datetime", tools=TOOLS, plot_height=480, plot_width=720, toolbar_location="right") p.segment(df.date, df.high, df.date, df.low, color="black") p.rect(df.date[inc], mids[inc], w, spans[inc], fill_color="#2ECC71", line_color="black") p.rect(df.date[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black") p.rect(df.date[nor], mids[nor], w, spans[nor], fill_color="black", line_color="black") p.title = "Candlestick of" + str(stock_name) p.xaxis.major_label_orientation = pi / 4 p.grid.grid_line_alpha = 0.3 show(p) # open a browser
def generate_gaussian_dataset(file, spectral): """ Extend data set. Args: file: The file dir to get data_set, data needs to extend, one spectral add gaussian noise generates, and label_set, Corresponding label. spectral: A list, its index means the categories and the corresponding value is the spectral which needs add noise. Return: data: Extended data set. label: Extended label set. """ data_set, label_set = rd.get_data(file) classes = np.max(label_set) + 1 data_num = np.zeros(classes, 1) for i in label_set: data_num[i] += 1 data_mean, data_std = da.get_statistic_by_class(data_set, label_set) noise_by_class = [] for eachclass in range(classes): spectral_idx = spectral[eachclass] noise = np.random.normal(data_mean[eachclass][spectral_idx], data_std[eachclass][spectral_idx], data_num[classes]) noise_by_class.append(noise) data = data_set label = label_set for eachdata, eachlabel in zip(data_set, label_set):
def cust(): global F tp = read_data.get_data(CURR, custom=True) gd = tp['game_data'] cd = tp['content_data'] number = int(request.form['number']) data = play_game.play(number, CURR, custom=True) print(data) if number == 6 : F = O if "options" not in data: F = O return data ops = data['options'] tdct = {} counter = ['y','n'] for i in ops: tdct[counter[0]] = { 'val': cd[i], 'next': ops[i]['next'], 'more':cd[ops[i]['more']] } del counter[0] res = { 'question':cd[data['question']], 'fact':F, 'chap':cd[data['chapter']], 'options':tdct } F = cd[data['fact']] print(res) # ans = {"question":"Second"} return res
def my_view_func(name): global CURR CURR = name tp = read_data.get_data(name, custom=True) gd = tp['game_data'] cd = tp['content_data'] res = play_game.play(1, name, custom=True) ops = res['options'] tdct = {} counter = ['y','n'] for i in ops: tdct[counter[0]] = { 'val': cd[i], 'next': ops[i]['next'], 'more':cd[ops[i]['more']] } del counter[0] global F,O F,O = [cd[res['fact']]]*2 res = { 'question':cd[res['question']], 'fact':None, 'chap':cd[res['chapter']], 'options':tdct } print(res) # ans = {"question":"Second"} res['name'] = name return render_template('play_custom.html', val=res)
def get_stock_name_of_template(first_date, last_date, tf, profit, highest_loss): stocks = get_stock_active_name_list() profit_return = None most_loss = None # stocks = ['AOT'] ret_dict = dict() ret_dict['symbol'] = [] ret_dict['profit_return'] = [] ret_dict['most_loss'] = [] ret_dict['first_date'] = [] ret_dict['last_date'] = [] for symbol in stocks: data = get_data(symbol, tf, first_date, last_date) try: profit_return = utils.calculate_profit(data['close']) most_loss = utils.most_loss(data['close']) except TypeError: pass # print(profit_return, most_loss) if profit_return is not None and most_loss is not None: if profit_return > profit and most_loss < highest_loss: print(symbol, profit_return, most_loss) ret_dict['symbol'].append(symbol) ret_dict['profit_return'].append(profit_return) ret_dict['most_loss'].append(most_loss) ret_dict['first_date'].append(first_date) ret_dict['last_date'].append(last_date) return ret_dict
def get_data(self): with tf.name_scope('data'): train_data, test_data = read_data.get_data(self.batch_size) iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) self.img, self.label = iterator.get_next() #print(self.img) self.train_init = iterator.make_initializer(train_data) self.test_init = iterator.make_initializer(test_data)
def get_results_protein(): train, test = read_data.get_data("Grupa5_data/protein.RData") x_train = pd.DataFrame(train.iloc[:, 0:2000]) x_test = pd.DataFrame(test) x_train = read_data.normalize_data(x_train) x_test = read_data.normalize_data(x_test) y_train = train.iloc[:, 2000] # param = {'alpha': 1e-2} ridge = Ridge(alpha=0.01) cross_validation.cross_validate(x_train, y_train, ridge)
def get_data(self): with tf.name_scope('data'): train_data, test_data = read_data.get_data(self.batch_size, self.n_train, self.n_test) iterator = tf.data.Iterator.from_structure( train_data.output_types, train_data.output_shapes) self.img, self.labels = iterator.get_next( ) # this img has 24 channels! self.train_init = iterator.make_initializer(train_data) self.test_init = iterator.make_initializer(test_data)
def get_skyline_count(acc_metric, size_metric, include_stanford=True): languages, taggers, acc = read_data.get_data( acc_metric, include_stanford=include_stanford) size = read_data.get_data(size_metric, include_stanford=include_stanford)[2] taggers_by_language = np.repeat(np.array(taggers), len(languages)).reshape( (len(taggers), len(languages))).T acc_by_language = np.array(acc).T size_by_language = np.array(size).T skyline_count = {x: 0 for x in taggers} for taggers, accs, sizes in zip(taggers_by_language, acc_by_language, size_by_language): zipped = list(zip(taggers, accs, sizes)) zipped.sort(key=lambda x: x[2]) models_on_skyline = get_models_on_skyline(zipped) for model in models_on_skyline: skyline_count[model] += 1 return taggers, list(skyline_count.values())
def calculate_lyapunov(unperturbed_file, perturbed_file): #TODO: remove references to N_calc """This function calculates the lyapunov exponent at each integration step between two solutions. PARAMETERS: unperturbed_file: (string) String object with the file name in which the unperturbed solution is stored perturbed_file: (string) String object with the file name in which the perturbed solution is stored RETURNS: lyaps : (float[N_steps]) N_steps-dimension array-like containing the values of the lyapunov exponent calcuated between solution1 and solution2 at each integration step for.""" data_u, mu_u, k_u=read_data.get_data(unperturbed_file,form='lyapunov') data_p, mu_p, k_p=read_data.get_data(perturbed_file,form='lyapunov') lenght=len(data_u) try: assert lenght==len(data_p) assert mu_u==mu_p assert k_u==k_p except AssertionError: print(colors.red|"The integration results are not written as expexted", file=sys.stderr) sys.exit([6]) #Integration results are not written as expected """ Actual calculation of the Lyapunov exponents""" """Difference between the two simulations""" difference=data_u-data_p """Norms of the difference vectors""" norms=np.linalg.norm(difference, axis=1) """Norm of the initial difference""" norm_0=norms[0] """Calculating the ln of the ratio between evolved state and initial state""" log_diff_ratio=np.log(norms/norm_0) lyaps=np.empty(lenght) cumulative_sum=0 print("Lyapunov exponents calculation:") for i in Progress.range(1, lenght): """calculating the mean of the ratios from initial state to state i""" cumulative_sum+=log_diff_ratio[i] lyaps[i]=cumulative_sum/i return lyaps
def main(): warnings.filterwarnings("ignore", category=FutureWarning) #read data tweets, labels, tests, test_labels = read_data.get_data(0.8) all_data = tweets + tests all_labels = labels + test_labels #k_fold_Cross_validation.validate(5, all_data, all_labels) trainDF = pandas.DataFrame() testDF = pandas.DataFrame() # # #remove noise # #pandas dataframe is a 2D array which can have several column_names and supports mathematical operations on rows and columns trainDF['text'] = Data_cleaner.remove_noise(tweets) trainDF['labels'] = labels testDF['tests'] = Data_cleaner.remove_noise(tests) testDF['test_labels'] = test_labels #print(trainDF) # # extract features from text and test train_features, test_features = extract_features.get_features_TF_IDF( trainDF['text'], testDF['tests']) #train_features,test_features = extract_features.word2vec(trainDF['text'],testDF['tests']) # # multi layer perceptron # clf = MLPClassifier() # parameter_space = { # 'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)], # 'activation': ['tanh', 'relu', 'logistic'], # 'solver': ['sgd', 'adam', 'lbfgs'], # 'alpha': [float(x) for x in np.linspace(0.0001, 5, num = 100)], # 'learning_rate': ['constant','adaptive']} # # grid = GridSearchCV(clf, parameter_space, n_jobs=-1, cv=3,verbose=1) # grid.fit(train_features, trainDF['labels']) # print(grid.best_params_) #clf.fit(train_features,trainDF['labels']) #p = clf.predict(test_features) #print(np.mean(p == testDF['test_labels'])) # run our first classifier naive naive_bayes #naive_bayes.run_naive_bayes(train_features, test_features,trainDF['labels'], testDF['test_labels']) # # # # logistic regression #logistic_regression.tune(train_features,trainDF['labels']) logistic_regression.run(train_features, test_features, trainDF['labels'], testDF['test_labels'])
def get_train_feature(): stocks_all = get_stock_active_name_list() stocks_good = get_stock_name_of_growth_more_than_percent_with_period(Decimal(15.0), 90) print(stocks_good) prev_day = datetime.datetime.strptime(str(datetime.date.today() - datetime.timedelta(90)), '%Y-%m-%d').strftime('%m/%d/%Y') y = [] feature_rsi_7 = [] feature_rsi_14 = [] feature_ema_10 = [] feature_ema_25 = [] feature_ema_50 = [] feature_ema_75 = [] feature_macd_vs_signal = [] # print(stocks_good) for symbol in stocks_all: # print(symbol + ": " + str(get_data(symbol, 'day', '12/19/2016', '12/19/2016'))) data = get_data(symbol, 'day', 0, prev_day) if data is None: pass # print('none' + symbol) else: feature_rsi_7.append([get_rsi_7(data['close'])]) feature_ema_10.append([data['close'][-1]/float(get_ema(data['close'], 10))]) feature_ema_25.append([data['close'][-1]/float(get_ema(data['close'], 25))]) feature_ema_50.append([data['close'][-1]/float(get_ema(data['close'], 50))]) # feature_ema_75.append([data['close'][-1]/float(get_ema(data['close'], 75))]) # feature_rsi_14.append([get_rsi_14(data['close'])]) feature_macd_vs_signal.append([macd_vs_signal(data['close'])]) if symbol in stocks_good: # print('good' + symbol) y.append([1]) else: y.append([0]) features = numpy.hstack([ numpy.array(feature_rsi_7), numpy.array(feature_ema_25), numpy.array(feature_ema_10), numpy.array(feature_ema_50), # numpy.array(feature_ema_75), # numpy.array(feature_rsi_14), numpy.array(feature_macd_vs_signal) ]) output = numpy.hstack([numpy.array(y)]) return features, output
def train_model(model, train_path, test_path, num_opochs=5000): criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1) for epoch in range(num_opochs): running_loss = 0.0 model.train(True) exp_lr_scheduler.step() lines = list(open(train_path, 'r')) steps = len(lines) / batch_size random.shuffle(lines) for i in range(int(steps)): # get the inputs inputs, lables = read_data.get_data(lines, batch_size, i) # wrap them in Variable inputs, lables = Variable(torch.Tensor(inputs)).cuda(), Variable( torch.Tensor(lables)).cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize start_time = time.time() outputs = model(inputs) end_time = time.time() _, predicted = torch.max(outputs, 1) lables = lables.to(device=torch.device("cuda:0"), dtype=torch.int64) correct = (predicted == lables).sum() acc_train = float(correct) / batch_size loss = criterion(outputs, lables) loss.backward() optimizer.step() # print statistics print('[%d, %5d] loss: %.3f time: %.3f' % (epoch + 1, i + 1, loss, end_time - start_time)) running_loss += loss if i % 10 == 0: acc = test(model, test_path) print('test [%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, i + 1, running_loss / 10, acc)) running_loss = 0.0 torch.save( model.state_dict(), 'model/model_' + str(epoch) + '_' + str(i) + '_' + str(acc) + '.pkl') print('Finished Training')
def get_results_cancer(): train, test = read_data.get_data("Grupa5_data/cancer.RData") x_train = pd.DataFrame(train.iloc[0:17737, :]) y_train = train.iloc[17737, :] x_test = pd.DataFrame(test) x_train = x_train.T y_train = y_train.T x_test = x_test.T print(x_train) print(x_test) x_train = read_data.normalize_data(x_train) x_test = read_data.normalize_data(x_test) ridge = Ridge() cross_validation.cross_validate(x_train, y_train, ridge)
def test_steady_state(mu, k, sign): """This function tests if the steady states are mantained""" if sign: sign=+1 else: sign=-1 x1_0=sign*k x2_0=sign/k y1_0=mu*k**2 y2_0=mu*k**-2 dynamo_test=integrator.dynamo(mu,k,30, [x1_0, x2_0, y1_0], "test_output.csv", 2**-8) dynamo_test.evolve(0) dynamo_test.write_results() t, x1, x2, y1, y2, mu, k=read_data.get_data("test_output.csv", '1') assert (abs(x1[-1]-x1_0)<2*10**-6) assert (abs(x2[-1]-x2_0)<2*10**-6) assert (abs(y1[-1]-y1_0)<2*10**-6) assert (abs(y2[-1]-y2_0)<2*10**-6) os.remove("test_output.csv")
def sim_time(dataname): time = data.get_data(dataname)[0] energy = data.get_data(dataname)[1] obs_ID = data.get_data(dataname)[2] t = data.get_data(dataname)[3] E = data.get_data(dataname)[4] dict = data.get_data(dataname)[5] t = np.array(t) sim_t = t - t for i in range(len(t)): sim_t[i] = np.random.random(len(t[i])) * (t[i][-1] - t[i][0]) + t[i][0] return sim_t
def get_time_interval_data(meal, interval=120): user = meal[0] records, cgm_data = get_data(user) start = records['Start'].iloc[meal[1]] def get_minutes(tdelta): days = tdelta.days seconds = tdelta.seconds return days * 1440 + seconds / 60. records['Start'] = (records['Start'] - start).apply(get_minutes) records['Finish'] = (records['Finish'] - start).apply(get_minutes) cgm_data['Time'] = (cgm_data['Time'] - start).apply(get_minutes) records = records[(records['Start'] >= 0) & (records['Start'] <= interval)] cgm_data = cgm_data[(cgm_data['Time'] >= 0) & (cgm_data['Time'] <= interval)] return records, cgm_data
def grid_search(): train, test = read_data.get_data("Grupa5_data/protein.RData") x_train = pd.DataFrame(train.iloc[:, 0:2000]) x_test = pd.DataFrame(test) x_train = read_data.normalize_data(x_train) x_test = read_data.normalize_data(x_test) y_train = train.iloc[:, 2000] ridge = Ridge() param = {'alpha': [1e-4, 1e-2, 1, 5, 10]} ridge_regr = GridSearchCV(ridge, param, scoring="neg_mean_squared_error", cv=10) ridge_regr.fit(x_train, y_train) print(ridge_regr.best_params_) print(ridge_regr.best_estimator_) print(ridge_regr.cv_results_) print(ridge_regr.best_index_)
def get_test_feature(): feature_rsi_7 = [] feature_rsi_14 = [] feature_ema_10 = [] feature_ema_25 = [] feature_ema_50 = [] feature_ema_75 = [] feature_macd_vs_signal = [] stocks_all = get_stock_active_name_list() for symbol in stocks_all: # print(symbol + ": " + str(get_data(symbol, 'day', '12/19/2016', '12/19/2016'))) data = get_data(symbol, 'day', 0, '12/19/2016') # print(data) if data is None: pass # print('none' + symbol) elif data is not None: feature_rsi_7.append([get_rsi_7(data['close'])]) # feature_rsi_14.append([get_rsi_14(data['close'])]) feature_ema_10.append([data['close'][-1]/float(get_ema(data['close'], 10))]) feature_ema_25.append([data['close'][-1]/float(get_ema(data['close'], 25))]) feature_ema_50.append([data['close'][-1]/float(get_ema(data['close'], 50))]) # feature_ema_75.append([data['close'][-1]/float(get_ema(data['close'], 75))]) feature_macd_vs_signal.append([macd_vs_signal(data['close'])]) features = numpy.hstack([ numpy.array(feature_rsi_7), numpy.array(feature_ema_25), numpy.array(feature_ema_10), numpy.array(feature_ema_50), # numpy.array(feature_ema_75), # numpy.array(feature_rsi_14), numpy.array(feature_macd_vs_signal) ]) return numpy.asarray(features)
def act(): global CURR CURR = None global FACT tp = read_data.get_data('ww1_f') gd = tp['game_data'] cd = tp['content_data'] number = int(request.form['number']) data = play_game.play(number, 'ww1_f') # print(data) if number == 6 : FACT = OG if "options" not in data: FACT = OG print(data) return data ops = data['options'] tdct = {} counter = ['y','n'] for i in ops: tdct[counter[0]] = { 'val': cd[i], 'next': ops[i]['next'], 'more':cd[ops[i]['more']] } del counter[0] res = { 'question':cd[data['question']], 'fact':FACT, 'chap':cd[data['chapter']], 'options':tdct } FACT = cd[data['fact']] # print(res) # ans = {"question":"Second"} return res
def run_sig_processing(data_src, labels_src, band_type): # parameters initialization start_time = 3 time_slides = 0.2 window_length = 2 segments_num = 11 data, labels, sfreq = get_data(data_src, labels_src) # execute preprocessed_data = dict() for subject in data: if subject not in preprocessed_data: preprocessed_data[subject] = dict() for session in data[subject]: df_trials_data = pd.DataFrame() for channel in data[subject][session]: session_data = data[subject][session][channel] trials_processed_data = list() for trial_data in session_data: processed_data = preprocess_signal(trial_data, start_time, time_slides, window_length, segments_num, sfreq) trials_processed_data.append(processed_data) df_trials_data[channel] = trials_processed_data # print(df_trials_data) # pause = input("pause: ") preprocessed_data[subject][session] = df_trials_data if band_type == 0 or band_type == 1: combined_data, combined_labels = combine_processed_data( preprocessed_data, labels) mu_band = feature_band_selection(combined_data, combined_labels, sfreq, step=1, band_range=(4, 14), band_size=(4, 5, 6), features_type=band_type) beta_band = feature_band_selection(combined_data, combined_labels, sfreq, step=1, band_range=(16, 40), band_size=(4, 5, 6), features_type=band_type) else: mu_band = dict() beta_band = dict() for subject in preprocessed_data: mu_band[subject] = (4, 14) beta_band[subject] = (16, 40) # get input data of CNN, add to column of dataFrame form processed_data[subject][session] for subject in preprocessed_data: for session in preprocessed_data[subject]: preprocessed_data[subject][session]['input data'] \ = preprocessed_data[subject][session].apply(get_input_data, axis=1, mu_band=mu_band[subject], beta_band=beta_band[subject]) return preprocessed_data, labels
from math import sqrt from read_data import get_data from metric_functions import get_kernel, get_distance from f_score import get_f_score from draughtsman import draw filename = 'dataset_191_wine.csv' X, Y = get_data(filename) h_max = int(sqrt(abs(len(X)))) DISTANCE_NAMES = ['manhattan', 'euclidean', 'chebyshev'] KERNEL_NAMES = ['uniform', 'triangular', 'epanechnikov', 'quartic'] WINDOW_TYPES = ['fixed', 'variable'] max_f_score = -1 win_kernel = "" win_distance = "" win_window = "" for window_type in WINDOW_TYPES: print('Window type: ' + window_type) is_fixed = window_type == 'fixed' for distance_name in DISTANCE_NAMES: print(' ' * 1 + 'Distance function: ' + distance_name) distance = get_distance(distance_name) for kernel_name in KERNEL_NAMES: print(' ' * 2 + 'Kernel function: ' + kernel_name)
parser.add_argument('--train', default=True, type=bool, help='train the model') opt = parser.parse_args() if opt.use_cuda: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ''' base_dir = os.path.join('..', 'input', 'skin-cancer-mnist-ham10000') all_image_path = glob(os.path.join(base_dir, '*', '*.jpg')) imageid_path_dict = { os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path } df_train, df_val = get_data(base_dir, imageid_path_dict) normMean, normStd = compute_img_mean_std(all_image_path) model = models.resnext101_32x8d(pretrained=True) model.fc = nn.Linear(in_features=2048, out_features=7) model.to(device) input_size = 224 train_transform = transforms.Compose([ transforms.Resize((input_size, input_size)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(20), transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
import numpy as np import sys sys.path.append("../network/") sys.path.append("../") from read_data import get_data from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import cross_validate, train_test_split, KFold from metrics import gain_chart, prob_acc from sklearn.tree import DecisionTreeClassifier from resampling import Resample X, Y = get_data() Y = Y.flatten() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5) #r = Resample(X_train, Y_train) #X_train, Y_train = r.Over() clf_rf = RandomForestClassifier(n_estimators=100, max_depth=8, min_samples_split=100) clf_rf.fit(X_train, Y_train) ypred_test = clf_rf.predict_proba(X_test) gain_chart(Y_test, ypred_test) prob_acc(Y_test, ypred_test) clf_dt = DecisionTreeClassifier(max_depth=6, min_samples_split=200)
import codecs import read_data as rd import process_data as pd DIR = r"/home/chixiao/projects/ECEI/" if __name__ == "__main__": data = rd.get_data(DIR, [0, 0.0001], 0.0001) print(data[:, :, -1]) print(data[:, :, -2]) pd.show_data(data)
import numpy as np from read_data import get_data import sys sys.path.append("network/") from NN import NeuralNet X, Y = get_data(normalized=False, standardized=True, file='droppedX6-X11.csv') nn = NeuralNet(X, Y.flatten(), nodes=[18, 50, 50, 2], activations=['tanh', 'tanh', None], cost_func='log', regularization='l2', lamb=0.001) nn.split_data(frac=0.5, shuffle=True, resample=True) nn.TrainNN(epochs=1000, batchSize=200, eta0=0.01, n_print=10)
from read_data import get_data, balance_data import os from sklearn.metrics import accuracy_score from sklearn.metrics import cohen_kappa_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score import random LOG_DIR = './bitcorn_model2' TIME_STEP = 6 # 滑动窗口大小 BATCH_SIZE = 4096 # 每次送入训练的样本数量 CHANNEL = 5 # 送入训练练的维数 LEARNING_RATE = 0.003 # 学习率 EPOCH = 20 # 训练的轮数 file_name = 'USDT_BTC 5min(2015-2018).csv' # 训练文件名 data_x, data_y = get_data(file_name, TIME_STEP) data_y = np.array(data_y).astype(np.int) data_x = np.array(data_x) # 割训练集与测试集 len_train = int(len(data_y) * 0.7) len_train_test = int(len(data_y) * 0.85) train_x = data_x[:len_train] train_y = data_y[:len_train] test_x = data_x[len_train:] test_y = data_y[len_train:] val_x = data_x[len_train_test:] val_y = data_y[len_train_test:] print(train_x.shape) graph = tf.Graph() with graph.as_default(): input_x = tf.placeholder(tf.float32, [None, TIME_STEP, CHANNEL])
#!/usr/bin/python3 import sys sys.path.append("..") # sys.path.append("..") from sklearn.tree import DecisionTreeClassifier from custom_classes.ribes_RFFSampler import ribes_RFFSampler from sklearn.kernel_approximation import RBFSampler from read_data import get_data train_data, train_predictions, test_data, test_predictions = get_data() train_predictions, test_predictions = train_predictions.ravel( ), test_predictions.ravel() desired_components = 16 sampler = RBFSampler(n_components=desired_components) sampler.fit(train_data) mapped_train_data = sampler.transform(train_data) mapped_test_data = sampler.transform(test_data) arbol = DecisionTreeClassifier() arbol.fit(mapped_train_data, train_predictions) test_score = arbol.score(mapped_test_data, test_predictions) print(test_score) # print(train_data[1:10, :]) # print(mapped_train_data[1:10,:]) # sampler = RBFSampler(n_components = desired_components) # sampler.fit(train_data)
import matplotlib.pyplot as plt from normalize import normalize from read_data import get_data from train import calc_real_wei try: with open('model/weights') as f: a, b = list(map(float, f.read().split('\n'))) except: print("Train first") exit() x, y = normalize(*get_data()) real_a, real_b = calc_real_wei(x, y) pred = [a * el + b for el in x] real_pred = [real_a * el + real_b for el in x] fig, ax = plt.subplots(figsize=(12, 8)) ax.plot(x, pred, 'r', label='Prediction') ax.plot(x, real_pred, 'b', label='Real line') ax.scatter(x, y, label='Traning Data') ax.legend(loc=2) ax.set_xlabel('Milleage') ax.set_ylabel('Price') ax.set_title('Predicted price vs. milleage') plt.show()
def main(): cross_path = 'data/timit/timit/cross/' test_path = 'data/timit/timit/test/' train_path = 'data/timit/timit/train/' #pre word data = get_data(cross_path) feature = data['features'] phoneme = data['phonemes'] word = data['words'] testdata = get_data(cross_path) testfeature = data['features'] testphone = data['phonemes'] testword = data['words'] pdict, wdict=getDictionary(phoneme, word) phoneme = proRawData(phoneme, pdict) word = proRawData(word, wdict) testphone = proRawData(testphone, pdict) testword = proRawData(testword, wdict) #feature->phoneme np.random.seed(0) memcell = 100 dimx = 12 dimy = 61 lstmweight = LstmWeight(memcell, dimx) softmaxweight = Weight(memcell, dimy) lstmnetwork = LSTMLayer(lstmweight) softmaxnetwork = SoftMaxLayer(softmaxweight) ####################Training####################### epoch = 0 maxloss = 0 for count in range(200): loss = 0 for item in range(len(feature)): for content in feature[item]: lstmnetwork.xlistAdd(content) output = lstmnetwork.getHmatrix() softmaxnetwork.outputAdd(output.T) ymatrix = softmaxnetwork.getYmatrix() ctclayer = CTC(ymatrix.T, phoneme[item]) do,tmp = ctclayer.returndY() loss += tmp softmaxnetwork.ylist(do) hmatrix = softmaxnetwork.getdHmatrix() lstmnetwork.ylist(hmatrix.T) lstmnetwork.xlistRefresh() softmaxnetwork.outputRefresh() softmaxweight.changeWeight(0.001) lstmweight.changeWeight(0.001) print(loss) if loss>maxloss: maxloss=loss ####################Testing####################### for item in range(len(testfeature)): for content in testfeature[item]: lstmnetwork.xlistAdd(content) print('label',testphone[item]) output = lstmnetwork.getHmatrix() lstmnetwork.xlistRefresh() print('predict',softmaxnetwork.predict(output.T))