def test_real(n_epochs=20, validation_frequency=1000): """ Test RNN with real-valued outputs. """ train, valid, test = process_data.load_data() tseq, ttargets = train vseq, vtargets = valid test_seq, test_targets = test length = len(tseq) n_hidden = 10 n_in = 48 n_out = 12 n_steps = 1 n_seq = length seq = [[i] for i in tseq] targets = [[i] for i in ttargets] model = MetaRNN(n_in=n_in, n_hidden=n_hidden, n_out=n_out, learning_rate=0.01, learning_rate_decay=0.99, n_epochs=n_epochs, activation='relu') model.fit(seq, targets, validation_frequency=validation_frequency) test_seq = [[i] for i in test_seq] test_targets = [[i] for i in test_targets] plt.close("all") for idx in xrange(len(test_seq)): guess = model.predict(test_seq[idx]) plot_predictions(test_seq[idx][0], test_targets[idx][0], guess[0])
def create_model(train=True): if train: (train_x, train_y), (test_x, test_y), (vocab, chunk_tags) = process_data.load_data() else: with open('model/config.pkl', 'rb') as inp: (vocab, chunk_tags) = pickle.load(inp) model = Sequential() model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) # Random embedding model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) crf = CRF(len(chunk_tags), sparse_target=True) model.add(crf) model.summary() model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) if train: return model, (train_x, train_y), (test_x, test_y) else: return model, (vocab, chunk_tags)
def run_ecg(model_name, smote=False, batch_size=256, learning_rate=0.001, num_epochs=25, saved_loader='', save_path=None): if smote and saved_loader: train_loader = torch.load("train_loader" + saved_loader) val_loader = torch.load("val_loader" + saved_loader) else: train_loader, val_loader = process_data.load_data( batch_size=batch_size, smote=smote) model = models.get_model(model_name) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) train(model, train_loader, val_loader, num_epochs, criterion, optimizer) if save_path: torch.save(model.state_dict(), save_path) return model
def main(): """ Run the main code for display """ filename = 'Sampleflyerdataset.csv' data = process_data.load_data(filename) process_data.update_hashtable(data) users, _ = process_data.get_user_ids(data) number_of_users = len(users) total_number_of_flyers = 0 user_l = [] avg_list = [] total_list = [] for u_id in users: value = ht.get(u_id) if value: total = process_data.algorithm(sorted(value)) al = len(value) avg = round(total / al, 1) print() print( f"|\tUser_ID: {u_id}\t|\tAverage Time on Flyer: {avg}Seconds\t|\tTotal Flyers: {al}" ) sleep(0.3) user_l.append(u_id) avg_list.append(avg) total_list.append(al) data_frame = { 'User ID': user_l, 'Average Time On Flyer': avg_list, 'Total Flyer': total_list } df = DataFrame(data_frame) df.to_excel('Report.xlsx', sheet_name='Sheet1', index=True)
def test_real(n_updates=100): """ Test RNN with real-valued outputs. """ train, valid, test = process_data.load_data() tseq, ttargets = train vseq, vtargets = valid test_seq, test_targets = test length = len(tseq) n_hidden = 6 n_in = 48 n_out = 12 n_steps = 1 n_seq = length seq = [[i] for i in tseq] targets = [[i] for i in ttargets] gradient_dataset = SequenceDataset([seq, targets], batch_size=None, number_batches=100) cg_dataset = SequenceDataset([seq, targets], batch_size=None, number_batches=20) model = MetaRNN(n_in=n_in, n_hidden=n_hidden, n_out=n_out, learning_rate=0.001, learning_rate_decay=0.999, n_epochs=500, activation='relu') opt = hf_optimizer(p=model.rnn.params, inputs=[model.x, model.y], s=model.rnn.y_pred, costs=[model.rnn.loss(model.y)], h=model.rnn.h) opt.train(gradient_dataset, cg_dataset, num_updates=n_updates) test_seq = [[i] for i in test_seq] test_targets = [[i] for i in test_targets] plt.close("all") for idx in xrange(len(test_seq)): guess = model.predict(test_seq[idx]) plot_predictions(test_seq[idx][0], test_targets[idx][0], guess[0])
t_string = "{}\t" * (len(fs) + 2) tupl = (index, "G-" + str(index)) + tuple( map(lambda x: row[x], fs)) line = t_string.format(*tupl) out.write(line + '\n') out.close() in_file = sys.argv[1] out_file = sys.argv[2] target_var = sys.argv[3] n = int(sys.argv[4]) data_in = pdata.load_data(in_file) sps = pdata.computeNSnapshots(data_in, n, target_var) y = list() values = list() cols = list() for p in sps.keys(): tp = data_in[p] for snaps in sps[p]: l = list() l.append(p) for e in snaps: i = e[0] l += list(tp[i].values())[:-1] values.append(l) y.append(e[1])
"delugepeckish", great deluge algorithm with peckish initialisation "delugerandom", great deluge algorithm with random initialisation "delugeall", great deluge algorithm with all initialisation "greedygoal=" main.py -g=default (--greedygoal=default) Use selected greedy criteria: "maxdif","mindif","maxmax","minmax","minmin","maxmin","minavg" "dataset=" main.py -d=number (--dataset=0) to use selected test set in a file main.py -d=* (--dataset=*) to use all test sets in a file "time" main.py -t (--time) to view start and end time "beautyoff" main.py -b (--beautyoff) strip some text blocks "visualise" main.py -v (--visualise) to run 3D visualization Modules matplotlib, numpy and scipy are required! "solutions" main.py -s (--solutions) to view assigment table """ loaded_data = load_data( input_file_name ) if set_demand == "*": set_demand = range( get_number_of_problems(loaded_data) ) # for each set independently for set_id in set_demand: w_price, w_space, w_capacity = select_problem( loaded_data, set_id) best_sol = 32000 best_type = None if decoration: print "\n Processing...\n Filename: %s\n Set: %s" % (input_file_name, set_id) print " -------------------------" if trivial_conditions( w_price, w_space, w_capacity ): # if trivial conditions succed, continue # test all constructvie heuristics
# Some links. # a deep dream of a NN - https://www.youtube.com/watch?v=sh-MQboWJug import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D import pickle from process_data import load_data (train_images, train_labels), (test_images, test_labels) = load_data() train_images, test_images = train_images / 255.0, test_images / 255.0 class_names = ['HCM', 'NOR', 'DCM'] model = Sequential() model.add( Conv2D(32, (3, 3), activation='relu', input_shape=train_images.shape[1:])) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(10, activation='softmax')) # model.summary()
def test_load_data(): assert (load_data('disaster_messages.csv', 'disaster_categories.csv').shape == (26386, 5))
import helper import process_data from parameters import model_params __author__ = 'Ehsan Khodabandeh' __version__ = '1.0' # ==================================== LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)s : %(message)s' logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) # Since the level here is INFO, all the logger.debug below are not displayed logger = logging.getLogger(__name__ + ': ') # ================== Set up data ================== input_df_dict, input_param_dict = process_data.load_data() logger.info('Data is loaded!') # ================== Set up the optimization model ================== """ Parameters: h: unit holding cost p: production capacity per month I_0: initial_inventory c_t: unit production cost in month t d_t: demand of month t Variables: X_t: Amount produced in month t I_t: Inventory at the end of period t
import bilsm_crf_model import process_data import numpy as np model, (vocab, chunk_tags) = bilsm_crf_model.create_model(train=False) predict_text = '中华人民共和国国务院总理周恩来在外交部长陈毅的陪同下,连续访问了埃塞俄比亚等非洲10国以及阿尔巴尼亚' # predict_text = '樊大志同志1987年8月参加工作。先后在东北财经大学、北京国际信托投资公司、北京市境外融投资管理中心、北京市国有资产经营有限责任公司、北京证券有限责任公司、北京首都创业集团有限公司、华夏银行股份有限公司工作。' (train_x, train_y), (test_x, test_y), (vocab1, chunk_tags1) = process_data.load_data() model.load_weights('model/crf.h5') # str, length = process_data.process_data(predict_text, vocab) # raw = model.predict(str)[0][-length:] # print(raw) pre_l = model.predict(test_x) raw = [[np.argmax(row) for row in l] for l in pre_l] tpre_nump, pre_nump = 0, 0 tpre_numl, pre_numl = 0, 0 tpre_numo, pre_numo = 0, 0 for l, r in zip(test_y, raw): for s, i in zip(l, r): if s == 1 and i == 1: tpre_nump += 1 elif s == 3 and i == 3: tpre_numl += 1 elif s == 5 and i == 5: tpre_numo += 1 for l in raw:
if __name__ == "__main__": if(sys.argv[1] == ""): print ("argv[1] is the path of file made from process_data.py") exit() #config seed = np.random.randint(0,1000) print (seed) #seed = 1337 batch_size = 32 nb_epoch = 25 all_num = 10 valid_rate = 0.9 path = '../log/%s-dnn_log_test'%(time.strftime("%m-%d_%H-%M")) print (path) input_shape, w2v_dim, label_class, data, label = load_data(path =sys.argv[1],filter_h =5, model_type = "RNN"); #label need to change label = np_utils.to_categorical(label, label_class) acces = np.zeros(all_num) for i in range(0, all_num): #random data np.random.seed(seed) np.random.shuffle(data) np.random.seed(seed) np.random.shuffle(label) #data split to train and test datasize = len(label) train_data = data[ : datasize*valid_rate] train_label = label[ : datasize*valid_rate]
# Loading the data. all_loads = [] all_element_keys = [] all_gps = [] all_park_data = [] starts = [(6, 2016), (9, 2016), (12, 2016), (3, 2017), (6, 2017)] ends = [(8, 2016), (11, 2016), (2, 2017), (5, 2017), (8, 2017)] for pair in zip(starts, ends): month_year_start = pair[0] month_year_end = pair[1] params = process_data.load_data(data_path=data_path, load_paths=[path], month_year_start=month_year_start, month_year_end=month_year_end, verbose=False) element_keys, loads, gps_loc, park_data, idx_to_day_hour, day_hour_to_idx = params all_element_keys.append(element_keys) all_loads.append(loads) all_gps.append(gps_loc) all_park_data.append(park_data) all_keys_seasonal = all_element_keys all_gps_seasonal = all_gps all_loads_seasonal = all_loads all_park_data_seasonal = all_park_data all_loads = [] all_gps = []
import pickle import keras import numpy as np from sklearn_crfsuite.metrics import flat_classification_report import bilsm_crf_model import process_data EPOCHS = 10 model, (train_x, train_y, _), (test_x, test_y, length), (vocab, chunk_tags) = bilsm_crf_model.create_model() dev_x, dev_y, dev_length = process_data.load_data(use_dev=True) # train model # split = 7000 # define the grid search parameters # batch_size = [10, 20, 40, 60, 80, 100] # epochs = [16, 32, 64, 100] # param_grid = dict(batch_size=batch_size, nb_epoch=epochs) # grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1) # grid_result = grid.fit(train_x[:split], train_y[:split], validation_data=[train_x[split:], train_y[split:]]) # # print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) # for params, mean_score, scores in grid_result.grid_scores_: # print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)) history = model.fit( train_x, train_y,
from flask import Flask from flask import render_template, request, jsonify from plotly.graph_objs import Bar from sqlalchemy import create_engine import joblib # import the load and clean data functions import sys sys.path.insert(1, './data') from process_data import load_data, clean_data app = Flask(__name__) # load and clean the data df = load_data('data/disaster_messages.csv', 'data/disaster_categories.csv') df = clean_data(df) def tokenize(text): tokens = word_tokenize(text) lemmatizer = WordNetLemmatizer() clean_tokens = [] for tok in tokens: clean_tok = lemmatizer.lemmatize(tok).lower().strip() clean_tokens.append(clean_tok) return clean_tokens
from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter import matplotlib.pyplot as plt import numpy as np from process_data import load_data d='p1' matdir = 'data/'+d datafile = matdir+'/delays.pkl' keys,stats,ufiles = load_data(matdir, datafile) ################## # Do some plots ################## #def plot_data(keys,stats): #stats['nrej'][cls][delay_cc][delay_ih] += 1 plt.ioff() for c in sorted(keys['clses']): print c X = np.array(np.sort(list(keys['cc']))) Y = np.array(np.sort(list(keys['ih']))) Z = np.zeros((X.shape[0],Y.shape[0])) for i in np.arange(X.shape[0]): for j in np.arange(Y.shape[0]): Z[i][j] = np.mean( stats['l_p'][c][X[i]][Y[j]] ) X, Y = np.meshgrid(X, Y)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--data", default="pems", help="data to use") args = parser.parse_args() if args.data == "pems": lstm = tf.keras.models.load_model('model_pems/lstm.h5') gru = tf.keras.models.load_model('model_pems/gru.h5') saes = tf.keras.models.load_model('model_pems/saes.h5') cnn_lstm = tf.keras.models.load_model('model_pems/cnn_lstm.h5') with open('model_pems/rf.h5', 'rb') as f: rf = cPickle.load(f) en_1 = tf.keras.models.load_model('model_pems/en_1.h5') en_2 = tf.keras.models.load_model('model_pems/en_2.h5') en_3 = tf.keras.models.load_model('model_pems/en_3.h5') elif args.data == "nyc": lstm = tf.keras.models.load_model('model_nyc/lstm.h5') gru = tf.keras.models.load_model('model_nyc/gru.h5') saes = tf.keras.models.load_model('model_nyc/saes.h5') cnn_lstm = tf.keras.models.load_model('model_nyc/cnn_lstm.h5') with open('model_nyc/rf.h5', 'rb') as f: rf = cPickle.load(f) en_1 = tf.keras.models.load_model('model_nyc/en_1.h5') en_2 = tf.keras.models.load_model('model_nyc/en_2.h5') en_3 = tf.keras.models.load_model('model_nyc/en_3.h5') models = [lstm, gru, saes, cnn_lstm, rf, en_1, en_2, en_3] names = ['LSTM', 'GRU', 'SAEs', 'CNN_LSTM', 'rf', 'EN_1', 'EN_2', 'EN_3'] if args.data == "pems": X_train, X_test, y_train, y_test, scaler = load_data( data="PEMS traffic prediction", force_download=False) elif args.data == "nyc": X_train, X_test, y_train, y_test, scaler = load_data( data="nyc_bike_dataset", force_download=False) rf_bk = X_test y_test = scaler.inverse_transform(y_test.reshape(-1, 1)).reshape(1, -1)[0] y_preds = [] for name, model in zip(names, models): if name == 'SAEs': X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1])) elif name == 'LSTM' or name == 'GRU' or name == 'CNN_LSTM' or name == "EN_1" or name == "EN_2" or name == "EN_3": X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) else: X_test = rf_bk file = 'images/' + name + '.png' predicted = model.predict(X_test) predicted = scaler.inverse_transform(predicted.reshape(-1, 1)).reshape( 1, -1)[0] y_preds.append(predicted[:288]) print(name) eva_regress(y_test, predicted) plot_results(y_test[:288], y_preds, names)
Load and parse VERITAS data and use to train a deep learning model for classification as gamma ray signal or hadronic background. """ import numpy as np from keras.models import Sequential from keras.layers import Convolution2D, MaxPooling2D, Merge, Flatten from keras.optimizers import Adam from process_data import load_data # Load the VERITAS data and parse into usable format print "Loading data..." data, labels = load_data(1000, '59521_data.txt', '59521_gammas.txt') print data.shape print labels.shape # Set hyperparameters lr = 0.0001 # Set up the model model = Sequential() model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same', input_shape=(4, 64, 64))) model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same')) model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same')) model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Convolution2D(128, 3, 3, activation='relu', border_mode='same')) model.add(Convolution2D(128, 3, 3, activation='relu', border_mode='same'))
from process_data import preprocess, load_data from render_charts import vis_data def print_info(df): print(df.dtypes) print(df.describe(datetime_is_numeric=True)) # print(df.Date.dt.day.explode().value_counts()) print(df.drop(columns=["State"]).isnull().sum()) print(df.drop(columns=["State"])[df["AvgTemperature"].isna()]) cites_num = len(df["City"].unique()) print( f"This data contains a list of daily average temperatures from {cites_num} cities and {len(df['Country'].unique())} countries." ) if __name__ == "__main__": df = preprocess(load_data()) print_info(df)
import numpy as np from keras import backend as K from process_data import load_data, build_dict, vectorize, load_glove_weights from net import Net N = 300000 N_d = int(N * 0.1) train_d, train_q, train_a = load_data('./dataset/cnn/train.txt', N, True) dev_d, dev_q, dev_a = load_data('./dataset/cnn/dev.txt', N_d, True) num_train = len(train_d) num_dev = len(dev_d) print('n_train', num_train, ', num_dev', num_dev) print('Build dictionary..') word_dict = build_dict(train_d + train_q) entity_markers = list( set([w for w in word_dict.keys() if w.startswith('@entity')] + train_a)) entity_markers = ['<unk_entity>'] + entity_markers entity_dict = {w: index for (index, w) in enumerate(entity_markers)} print('Entity markers: %d' % len(entity_dict)) num_labels = len(entity_dict) doc_maxlen = max(map(len, (d for d in train_d))) query_maxlen = max(map(len, (q for q in train_q))) print('doc_maxlen:', doc_maxlen, ', q_maxlen:', query_maxlen) v_train_d, v_train_q, v_train_y, _ = vectorize(train_d, train_q, train_a, word_dict, entity_dict,
#import modules import process_data import pandas as pd #load data from csv files using process_data.py methods data = process_data.load_data('disaster_messages.csv', 'disaster_categories.csv') #creating a separate clean dataset to train model using the process_data.py method data_clean = process_data.clean_data(data) #saving a sqlite db for models using the processed data and process_data.py methods process_data.save_data(data_clean, 'emergency') def custom_clean_data(df): """Clean categories and merge to messages Args: df => DataFrame of merged categories and messages csv files Returns: df => Dataframe of cleaned categories and dropped duplicateds """ categories = pd.Series(df.categories).str.split(';', expand=True) row = categories.loc[0] category_colnames = row.apply(lambda x: x[:-2]).values categories.columns = category_colnames for column in categories: # set each value to be the last character of the string categories[column] = categories[column].apply(lambda x: x[-1:]).values
import process_data (train_x, train_y), (test_x, test_y), (vocab, chunk_tags) = process_data.load_data()
if model_params['module'] == 'gurobi': from optimization_model_gurobi import OptimizationModel elif model_params['module'] == 'cplex': from optimization_model_docplex import OptimizationModel elif model_params['module'] == 'xpress': from optimization_model_xpress import OptimizationModel else: from optimization_model_pulp import OptimizationModel __author__ = 'Ehsan Khodabandeh' __version__ = '1.1' # ==================================== LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)s : %(message)s' logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) logger = logging.getLogger(__name__ + ': ') # ================== Set up data ================== input_df_dict, input_param_dict = load_data() logger.info('Data is loaded!') # ================== Optimization ================== start = time() optimizer = OptimizationModel(input_df_dict['input_data'], input_param_dict) logger.info(f'Model creation time in sec: {time() - start:.4f}') optimizer.optimize() # ================== Output ================== optimizer.create_output() logger.info('Outputs are written to csv!')
ax = fig.add_axes([.14, .14, .8, .74]) # plot our data ax.loglog(x_norm, y_norm, 'b-') ax.set_autoscale_on(False) # Otherwise, infinite loop y_theory = function(x_norm, popt[0], popt[1]) ax.loglog(x_norm, y_theory, 'r-') ax.set_title(words + " for " + filename) fig.savefig('./figures/spectral_fits/v/' + filename + '_fit_' + words + '.png') for filename in FILENAMES: dat_bin = load_data(filename + '_binned') VINDS = [ abs(dat_bin.v) < 0.05, (0.05 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.1), (0.1 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.2), (0.2 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.3), abs(dat_bin.u) > 0.3 ] VINDS_words = [ ' v less than 0.5 ', ' v between 0.5 and 1.0 ', ' v between 1.0 and 1.5 ', ' v between 1.5 and 2.0 ', ' v greater than 2.0 ' ] fname = './csv_files/' + filename + '_results.csv' if isfile(fname):
def test_clean_data(): df1 = load_data('disaster_messages.csv', 'disaster_categories.csv') df2 = clean_data(df1) assert (df2.shape == (26216, 40))
#parser.add_argument('--pluck-damping-variation', type=float, default=0.25, # help='Pluck damping variation (default: 0.25)') #parser.add_argument('--string-tension', type=float, default=0.1, # help='String tension (default: 0.0)') #parser.add_argument('--stereo-spread', type=float, default=0.2, # help='Stereo spread (default: 0.2)') #parser.add_argument('--string-damping-calculation', type=str, default='magic', # help='Stereo spread (default: magic)') #parser.add_argument('--body', type=str, default='simple', # help='Stereo spread (default: simple)') #parser.add_argument('--mode', type=str, default='karplus-strong', choices=['karplus-strong', 'sine'], # help='Which type of audio to generate.') #args = parser.parse_args() if __name__ == '__main__': # guitar = Guitar(options=args) # audio_buffer = sequencer.play_guitar(guitar) # cqt = librosa.cqt(audio_buffer, sr=40000, n_bins=84*4, bins_per_octave=12*4, hop_length=256, filter_scale=0.8) # inverse_cqt = librosa.icqt(cqt, sr=40000, bins_per_octave=12*4, hop_length=256, filter_scale=0.8) # print('CQT size: {}'.format(cqt.shape)) # # plt.imshow(cqt) # # plt.show() # librosa.output.write_wav('guitar_output.wav', audio_buffer, 40000) # The 40000 is the sampling frequency # librosa.output.write_wav('guitar_output_reconstructed.wav', inverse_cqt, 40000) # The 40000 is the sampling frequency net = process_data.Net().to(device) train_data, test_data, val_data, eval_data = process_data.load_data() # process_data.train_model(net, train_data, val_data, eval_data) process_data.test(net, test_data)
def reynolds_stress(dat_bin, filename): """Plots the Reynold's Stress""" fig = plt.figure(1, figsize=[8, 4]) fig.clf() ax = fig.add_axes([.14, .14, .8, .74]) # first, convert the num_time to date_time, and plot this versus dat_raw.u date_time = dt.num2date(dat_bin.mpltime) # plot the data ax.plot(date_time, dat_bin.upvp_, 'r-', rasterized=True) ax.plot(date_time, dat_bin.upwp_, 'g-', rasterized=True) ax.plot(date_time, dat_bin.vpwp_, 'b-', rasterized=True) # label axes ax.set_xlabel('Time') ax.set_ylabel('Reynolds Stresses $\mathrm{[m^2/s^2]}$', size='large') fig.savefig('./figures/' + filename + '_reynolds_plot.png') for filename in FILENAMES: dat_bin = load_data(filename + '_binned') dat_screen = load_data(filename + '_processed') dat_raw = load_data(filename + '_raw') processed_plot(dat_raw, dat_screen, filename) spectrum_plot(dat_bin, filename) tke_plot(dat_bin, filename) reynolds_stress(dat_bin, filename)
from ADNet_6 import Model import process_data x_train, x_dev, y_train, y_dev = process_data.load_data('train') x_test, id = process_data.load_data('test') model = Model() model.train(x_train, y_train, x_dev, y_dev, batch_size=64, epoch=20) predict = model.predict(x_test) print('result ==> result.csv') process_data.output_data(predict, id)
return (p + 1) / ((y == y_i).sum() + 1) self._r = sparse.csr_matrix(np.log(pr(x, 1, y) / pr(x, 0, y))) x_nb = x.multiply(self._r) if self.dual == 'auto': self.dual = x_nb.shape[0] <= x_nb.shape[1] self._clf = LogisticRegression(C=self.C, dual=self.dual, n_jobs=1, verbose=self.verbose) self._clf.fit(x_nb, y) return self (train_ids, train_texts, train_labels), (test_ids, test_texts) = process_data.load_data("../input") # My version of cleaning the text # clean_text = partial(process_data.clean_text, remove_stopwords=True, replace_numbers=True, remove_special_chars=True, # stem_words=True) # train_texts = process_data.clean(train_texts, clean_text) # test_texts = process_data.clean(test_texts, clean_text) # tokenize = nltk.tokenize.word_tokenize # The kernel writer's version import re, string re_tok = re.compile('([{}“”¨«»®´·º½¾¿¡§£₤‘’])'.format(string.punctuation)) def tokenize(s):
import numpy as np import sys from pathlib import Path import os if __name__ == "__main__": datafile = sys.argv[1] file_trics = sys.argv[2] path_matr = sys.argv[3] target_var = sys.argv[4] n = int(sys.argv[5]) cat_feats = list(map(str, sys.argv[6].strip('[]').split(','))) cont_feats = list(map(str, sys.argv[7].strip('[]').split(','))) data_in = pdata.load_data(datafile) filename = datafile.split("/")[-1] sps = pdata.computeNSnapshots(data_in, 3, target_var) y = list() values = list() cols = list() for p in sps.keys(): tp = data_in[p] for snaps in sps[p]: l = list() l.append(p) for e in snaps: i = e[0]
adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-6) model.compile(loss='binary_crossentropy', class_mode = 'multi_label', optimizer=adadelta) return model if __name__ == "__main__": if(sys.argv[1] == ""): print ("argv[1] is the path of file made from process_data.py") exit() #config batch_size = 50 nb_epoch = 25 input_shape, w2v_dim, label_class, data, label = load_data(path =sys.argv[1],filter_h =5); label = np_utils.to_categorical(label, label_class) #print("Pad sequences(sample x time)") #data = sequence.pad_sequences(data) model = deep_CNN(input_shape, label_class) print("begin train model..") model.fit(data, label, batch_size = batch_size, nb_epoch = nb_epoch, shuffle = True, show_accuracy = True, verbose = 1 )
from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter import matplotlib.pyplot as plt import numpy as np from process_data import load_data d = 'p1' matdir = 'data/' + d datafile = matdir + '/delays.pkl' keys, stats, ufiles = load_data(matdir, datafile) ################## # Do some plots ################## #def plot_data(keys,stats): #stats['nrej'][cls][delay_cc][delay_ih] += 1 plt.ioff() for c in sorted(keys['clses']): print c X = np.array(np.sort(list(keys['cc']))) Y = np.array(np.sort(list(keys['ih']))) Z = np.zeros((X.shape[0], Y.shape[0])) for i in np.arange(X.shape[0]): for j in np.arange(Y.shape[0]): Z[i][j] = np.mean(stats['l_p'][c][X[i]][Y[j]]) X, Y = np.meshgrid(X, Y) fig = plt.figure()