예제 #1
0
def test_real(n_epochs=20, validation_frequency=1000):
    """ Test RNN with real-valued outputs. """
    train, valid, test = process_data.load_data()
    tseq, ttargets = train
    vseq, vtargets = valid
    test_seq, test_targets = test
    length = len(tseq)

    n_hidden = 10
    n_in = 48
    n_out = 12
    n_steps = 1
    n_seq = length

    seq = [[i] for i in tseq]
    targets = [[i] for i in ttargets]

    model = MetaRNN(n_in=n_in, n_hidden=n_hidden, n_out=n_out,
                    learning_rate=0.01, learning_rate_decay=0.99,
                    n_epochs=n_epochs, activation='relu')

    model.fit(seq, targets, validation_frequency=validation_frequency)

    test_seq = [[i] for i in test_seq]
    test_targets = [[i] for i in test_targets]
    plt.close("all")
    for idx in xrange(len(test_seq)):
        guess = model.predict(test_seq[idx])
        plot_predictions(test_seq[idx][0], test_targets[idx][0], guess[0])
예제 #2
0
def create_model(train=True):
    if train:
        (train_x, train_y), (test_x,
                             test_y), (vocab,
                                       chunk_tags) = process_data.load_data()
    else:
        with open('model/config.pkl', 'rb') as inp:
            (vocab, chunk_tags) = pickle.load(inp)
    model = Sequential()
    model.add(Embedding(len(vocab), EMBED_DIM,
                        mask_zero=True))  # Random embedding
    model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True)))
    crf = CRF(len(chunk_tags), sparse_target=True)
    model.add(crf)
    model.summary()
    model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
    if train:
        return model, (train_x, train_y), (test_x, test_y)
    else:
        return model, (vocab, chunk_tags)
def run_ecg(model_name,
            smote=False,
            batch_size=256,
            learning_rate=0.001,
            num_epochs=25,
            saved_loader='',
            save_path=None):

    if smote and saved_loader:
        train_loader = torch.load("train_loader" + saved_loader)
        val_loader = torch.load("val_loader" + saved_loader)
    else:
        train_loader, val_loader = process_data.load_data(
            batch_size=batch_size, smote=smote)
    model = models.get_model(model_name)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    train(model, train_loader, val_loader, num_epochs, criterion, optimizer)
    if save_path:
        torch.save(model.state_dict(), save_path)
    return model
예제 #4
0
파일: main.py 프로젝트: asheux/shopflyer
def main():
    """ Run the main code for display """
    filename = 'Sampleflyerdataset.csv'

    data = process_data.load_data(filename)
    process_data.update_hashtable(data)
    users, _ = process_data.get_user_ids(data)

    number_of_users = len(users)
    total_number_of_flyers = 0

    user_l = []
    avg_list = []
    total_list = []

    for u_id in users:
        value = ht.get(u_id)
        if value:
            total = process_data.algorithm(sorted(value))
            al = len(value)
            avg = round(total / al, 1)
            print()

            print(
                f"|\tUser_ID: {u_id}\t|\tAverage Time on Flyer: {avg}Seconds\t|\tTotal Flyers: {al}"
            )
            sleep(0.3)

            user_l.append(u_id)
            avg_list.append(avg)
            total_list.append(al)

    data_frame = {
        'User ID': user_l,
        'Average Time On Flyer': avg_list,
        'Total Flyer': total_list
    }

    df = DataFrame(data_frame)
    df.to_excel('Report.xlsx', sheet_name='Sheet1', index=True)
예제 #5
0
def test_real(n_updates=100):
    """ Test RNN with real-valued outputs. """
    train, valid, test = process_data.load_data()
    tseq, ttargets = train
    vseq, vtargets = valid
    test_seq, test_targets = test
    length = len(tseq)

    n_hidden = 6
    n_in = 48
    n_out = 12
    n_steps = 1
    n_seq = length

    seq = [[i] for i in tseq]
    targets = [[i] for i in ttargets]

    gradient_dataset = SequenceDataset([seq, targets], batch_size=None, number_batches=100)
    cg_dataset = SequenceDataset([seq, targets], batch_size=None,
                                 number_batches=20)

    model = MetaRNN(n_in=n_in, n_hidden=n_hidden, n_out=n_out,
                    learning_rate=0.001, learning_rate_decay=0.999,
                    n_epochs=500, activation='relu')

    opt = hf_optimizer(p=model.rnn.params, inputs=[model.x, model.y],
                       s=model.rnn.y_pred,
                       costs=[model.rnn.loss(model.y)], h=model.rnn.h)

    opt.train(gradient_dataset, cg_dataset, num_updates=n_updates)

    test_seq = [[i] for i in test_seq]
    test_targets = [[i] for i in test_targets]
    plt.close("all")
    for idx in xrange(len(test_seq)):
        guess = model.predict(test_seq[idx])
        plot_predictions(test_seq[idx][0], test_targets[idx][0], guess[0])
                t_string = "{}\t" * (len(fs) + 2)
                tupl = (index, "G-" + str(index)) + tuple(
                    map(lambda x: row[x], fs))
                line = t_string.format(*tupl)
                out.write(line + '\n')

    out.close()


in_file = sys.argv[1]
out_file = sys.argv[2]
target_var = sys.argv[3]
n = int(sys.argv[4])

data_in = pdata.load_data(in_file)
sps = pdata.computeNSnapshots(data_in, n, target_var)

y = list()
values = list()
cols = list()
for p in sps.keys():
    tp = data_in[p]
    for snaps in sps[p]:
        l = list()
        l.append(p)
        for e in snaps:
            i = e[0]
            l += list(tp[i].values())[:-1]
        values.append(l)
        y.append(e[1])
예제 #7
0
파일: main.py 프로젝트: juzna/PA184
        "delugepeckish", great deluge algorithm with peckish initialisation
        "delugerandom", great deluge algorithm with random initialisation
        "delugeall", great deluge algorithm with all initialisation
    "greedygoal=" main.py -g=default (--greedygoal=default)
        Use selected greedy criteria:
        "maxdif","mindif","maxmax","minmax","minmin","maxmin","minavg"
    "dataset="    main.py -d=number (--dataset=0) to use selected test set in a file
                  main.py -d=* (--dataset=*) to use all test sets in a file
    "time"        main.py -t (--time) to view start and end time
    "beautyoff"   main.py -b (--beautyoff) strip some text blocks
    "visualise"   main.py -v (--visualise) to run 3D visualization
        Modules matplotlib, numpy and scipy are required!
    "solutions"   main.py -s (--solutions) to view assigment table
                """

    loaded_data = load_data( input_file_name )
    if set_demand == "*":
        set_demand = range( get_number_of_problems(loaded_data) )

    # for each set independently
    for set_id in set_demand:
        w_price, w_space, w_capacity = select_problem( loaded_data, set_id)
        best_sol = 32000
        best_type = None
        if decoration:
            print "\n Processing...\n Filename: %s\n Set: %s" % (input_file_name, set_id)
            print " -------------------------"
        if trivial_conditions( w_price, w_space, w_capacity ):
            # if trivial conditions succed, continue
            
            # test all constructvie heuristics
예제 #8
0
# Some links.
# a deep dream of a NN - https://www.youtube.com/watch?v=sh-MQboWJug

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle
from process_data import load_data

(train_images, train_labels), (test_images, test_labels) = load_data()

train_images, test_images = train_images / 255.0, test_images / 255.0

class_names = ['HCM', 'NOR', 'DCM']

model = Sequential()

model.add(
    Conv2D(32, (3, 3), activation='relu', input_shape=train_images.shape[1:]))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

# model.summary()
def test_load_data():
    assert (load_data('disaster_messages.csv',
                      'disaster_categories.csv').shape == (26386, 5))
예제 #10
0
import helper
import process_data
from parameters import model_params

__author__ = 'Ehsan Khodabandeh'
__version__ = '1.0'
# ====================================

LOG_FORMAT = '%(asctime)s  %(name)-12s %(levelname)s : %(message)s'
logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
# Since the level here is INFO, all the logger.debug below are not displayed
logger = logging.getLogger(__name__ + ': ')

# ================== Set up data ==================
input_df_dict, input_param_dict = process_data.load_data()
logger.info('Data is loaded!')

# ================== Set up the optimization model ==================
"""
Parameters:
h: unit holding cost
p: production capacity per month
I_0: initial_inventory
c_t: unit production cost in month t
d_t: demand of month t

Variables:
X_t: Amount produced in month t
I_t: Inventory at the end of period t
예제 #11
0
import bilsm_crf_model
import process_data
import numpy as np

model, (vocab, chunk_tags) = bilsm_crf_model.create_model(train=False)
predict_text = '中华人民共和国国务院总理周恩来在外交部长陈毅的陪同下,连续访问了埃塞俄比亚等非洲10国以及阿尔巴尼亚'
# predict_text = '樊大志同志1987年8月参加工作。先后在东北财经大学、北京国际信托投资公司、北京市境外融投资管理中心、北京市国有资产经营有限责任公司、北京证券有限责任公司、北京首都创业集团有限公司、华夏银行股份有限公司工作。'
(train_x, train_y), (test_x, test_y), (vocab1,
                                       chunk_tags1) = process_data.load_data()

model.load_weights('model/crf.h5')

# str, length = process_data.process_data(predict_text, vocab)
# raw = model.predict(str)[0][-length:]
# print(raw)
pre_l = model.predict(test_x)
raw = [[np.argmax(row) for row in l] for l in pre_l]

tpre_nump, pre_nump = 0, 0
tpre_numl, pre_numl = 0, 0
tpre_numo, pre_numo = 0, 0
for l, r in zip(test_y, raw):
    for s, i in zip(l, r):
        if s == 1 and i == 1:
            tpre_nump += 1
        elif s == 3 and i == 3:
            tpre_numl += 1
        elif s == 5 and i == 5:
            tpre_numo += 1

for l in raw:
if __name__ == "__main__":
    if(sys.argv[1] == ""):
        print ("argv[1] is the path of file made from process_data.py")
        exit()
    #config
    seed = np.random.randint(0,1000)
    print (seed)
    #seed = 1337
    batch_size = 32
    nb_epoch = 25
    all_num = 10
    valid_rate = 0.9
    path = '../log/%s-dnn_log_test'%(time.strftime("%m-%d_%H-%M"))
    print (path)

    input_shape, w2v_dim, label_class, data, label = load_data(path
            =sys.argv[1],filter_h =5, model_type = "RNN");
    #label need to change
    label = np_utils.to_categorical(label, label_class)

    acces = np.zeros(all_num)
    for i in range(0, all_num):
        #random data
        np.random.seed(seed) 
        np.random.shuffle(data)
        np.random.seed(seed) 
        np.random.shuffle(label)

        #data split to train and test
        datasize = len(label)
        train_data = data[ : datasize*valid_rate]
        train_label = label[ : datasize*valid_rate]
예제 #13
0
# Loading the data.
all_loads = []
all_element_keys = []
all_gps = []
all_park_data = []
starts = [(6, 2016), (9, 2016), (12, 2016), (3, 2017), (6, 2017)]
ends = [(8, 2016), (11, 2016), (2, 2017), (5, 2017), (8, 2017)]

for pair in zip(starts, ends):
    
    month_year_start = pair[0]
    month_year_end = pair[1]
        
    params = process_data.load_data(data_path=data_path, load_paths=[path], 
                                    month_year_start=month_year_start, month_year_end=month_year_end, 
                                    verbose=False)
    element_keys, loads, gps_loc, park_data, idx_to_day_hour, day_hour_to_idx = params
    
    all_element_keys.append(element_keys)
    all_loads.append(loads)
    all_gps.append(gps_loc)
    all_park_data.append(park_data)    
    
all_keys_seasonal = all_element_keys 
all_gps_seasonal = all_gps
all_loads_seasonal = all_loads
all_park_data_seasonal = all_park_data

all_loads = []
all_gps = []
예제 #14
0
import pickle

import keras
import numpy as np
from sklearn_crfsuite.metrics import flat_classification_report

import bilsm_crf_model
import process_data

EPOCHS = 10
model, (train_x, train_y,
        _), (test_x, test_y,
             length), (vocab, chunk_tags) = bilsm_crf_model.create_model()
dev_x, dev_y, dev_length = process_data.load_data(use_dev=True)
# train model
# split = 7000

# define the grid search parameters
# batch_size = [10, 20, 40, 60, 80, 100]
# epochs = [16, 32, 64, 100]
# param_grid = dict(batch_size=batch_size, nb_epoch=epochs)
# grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
# grid_result = grid.fit(train_x[:split], train_y[:split], validation_data=[train_x[split:], train_y[split:]])
#
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# for params, mean_score, scores in grid_result.grid_scores_:
#     print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

history = model.fit(
    train_x,
    train_y,
예제 #15
0
from flask import Flask
from flask import render_template, request, jsonify
from plotly.graph_objs import Bar
from sqlalchemy import create_engine
import joblib

# import the load and clean data functions
import sys
sys.path.insert(1, './data')
from process_data import load_data, clean_data

app = Flask(__name__)

# load and clean the data
df = load_data('data/disaster_messages.csv', 'data/disaster_categories.csv')
df = clean_data(df)


def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

예제 #16
0
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import numpy as np
from process_data import load_data

d='p1'
matdir   = 'data/'+d
datafile = matdir+'/delays.pkl'
keys,stats,ufiles = load_data(matdir, datafile)


##################
# Do some plots
##################
#def plot_data(keys,stats):
#stats['nrej'][cls][delay_cc][delay_ih] += 1

plt.ioff()
for c in sorted(keys['clses']):
    print c

    X = np.array(np.sort(list(keys['cc'])))
    Y = np.array(np.sort(list(keys['ih'])))
    Z = np.zeros((X.shape[0],Y.shape[0]))
    for i in np.arange(X.shape[0]):
        for j in np.arange(Y.shape[0]):
            Z[i][j] = np.mean( stats['l_p'][c][X[i]][Y[j]] )
    X, Y = np.meshgrid(X, Y)
예제 #17
0
def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="pems", help="data to use")
    args = parser.parse_args()

    if args.data == "pems":
        lstm = tf.keras.models.load_model('model_pems/lstm.h5')

        gru = tf.keras.models.load_model('model_pems/gru.h5')

        saes = tf.keras.models.load_model('model_pems/saes.h5')

        cnn_lstm = tf.keras.models.load_model('model_pems/cnn_lstm.h5')

        with open('model_pems/rf.h5', 'rb') as f:
            rf = cPickle.load(f)

        en_1 = tf.keras.models.load_model('model_pems/en_1.h5')

        en_2 = tf.keras.models.load_model('model_pems/en_2.h5')

        en_3 = tf.keras.models.load_model('model_pems/en_3.h5')

    elif args.data == "nyc":

        lstm = tf.keras.models.load_model('model_nyc/lstm.h5')

        gru = tf.keras.models.load_model('model_nyc/gru.h5')

        saes = tf.keras.models.load_model('model_nyc/saes.h5')

        cnn_lstm = tf.keras.models.load_model('model_nyc/cnn_lstm.h5')

        with open('model_nyc/rf.h5', 'rb') as f:
            rf = cPickle.load(f)

        en_1 = tf.keras.models.load_model('model_nyc/en_1.h5')

        en_2 = tf.keras.models.load_model('model_nyc/en_2.h5')

        en_3 = tf.keras.models.load_model('model_nyc/en_3.h5')

    models = [lstm, gru, saes, cnn_lstm, rf, en_1, en_2, en_3]
    names = ['LSTM', 'GRU', 'SAEs', 'CNN_LSTM', 'rf', 'EN_1', 'EN_2', 'EN_3']

    if args.data == "pems":
        X_train, X_test, y_train, y_test, scaler = load_data(
            data="PEMS traffic prediction", force_download=False)
    elif args.data == "nyc":
        X_train, X_test, y_train, y_test, scaler = load_data(
            data="nyc_bike_dataset", force_download=False)

    rf_bk = X_test

    y_test = scaler.inverse_transform(y_test.reshape(-1, 1)).reshape(1, -1)[0]

    y_preds = []
    for name, model in zip(names, models):
        if name == 'SAEs':
            X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1]))
        elif name == 'LSTM' or name == 'GRU' or name == 'CNN_LSTM' or name == "EN_1" or name == "EN_2" or name == "EN_3":
            X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        else:
            X_test = rf_bk

        file = 'images/' + name + '.png'
        predicted = model.predict(X_test)
        predicted = scaler.inverse_transform(predicted.reshape(-1, 1)).reshape(
            1, -1)[0]
        y_preds.append(predicted[:288])
        print(name)
        eva_regress(y_test, predicted)

    plot_results(y_test[:288], y_preds, names)
예제 #18
0
Load and parse VERITAS data and use to train a deep learning model for
classification as gamma ray signal or hadronic background.
"""

import numpy as np

from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Merge, Flatten
from keras.optimizers import Adam

from process_data import load_data

# Load the VERITAS data and parse into usable format
print "Loading data..."
data, labels = load_data(1000, '59521_data.txt', '59521_gammas.txt')
print data.shape
print labels.shape

# Set hyperparameters
lr = 0.0001

# Set up the model
model = Sequential()
model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same', input_shape=(4, 64, 64)))
model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same'))
model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same'))
model.add(Convolution2D(64, 6, 6, activation='relu', border_mode='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(128, 3, 3, activation='relu', border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu', border_mode='same'))
from process_data import preprocess, load_data
from render_charts import vis_data


def print_info(df):
    print(df.dtypes)
    print(df.describe(datetime_is_numeric=True))
    # print(df.Date.dt.day.explode().value_counts())
    print(df.drop(columns=["State"]).isnull().sum())

    print(df.drop(columns=["State"])[df["AvgTemperature"].isna()])

    cites_num = len(df["City"].unique())

    print(
        f"This data contains a list of daily average temperatures from {cites_num} cities and {len(df['Country'].unique())} countries."
    )


if __name__ == "__main__":
    df = preprocess(load_data())
    print_info(df)
import numpy as np

from keras import backend as K

from process_data import load_data, build_dict, vectorize, load_glove_weights
from net import Net

N = 300000
N_d = int(N * 0.1)
train_d, train_q, train_a = load_data('./dataset/cnn/train.txt', N, True)
dev_d, dev_q, dev_a = load_data('./dataset/cnn/dev.txt', N_d, True)

num_train = len(train_d)
num_dev = len(dev_d)
print('n_train', num_train, ', num_dev', num_dev)

print('Build dictionary..')
word_dict = build_dict(train_d + train_q)
entity_markers = list(
    set([w for w in word_dict.keys() if w.startswith('@entity')] + train_a))
entity_markers = ['<unk_entity>'] + entity_markers
entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
print('Entity markers: %d' % len(entity_dict))
num_labels = len(entity_dict)

doc_maxlen = max(map(len, (d for d in train_d)))
query_maxlen = max(map(len, (q for q in train_q)))
print('doc_maxlen:', doc_maxlen, ', q_maxlen:', query_maxlen)

v_train_d, v_train_q, v_train_y, _ = vectorize(train_d, train_q, train_a,
                                               word_dict, entity_dict,
예제 #21
0
#import modules
import process_data
import pandas as pd

#load data from csv files using process_data.py methods
data = process_data.load_data('disaster_messages.csv',
                              'disaster_categories.csv')
#creating a separate clean dataset to train model using the process_data.py method
data_clean = process_data.clean_data(data)

#saving a sqlite db for models using the processed data and process_data.py methods
process_data.save_data(data_clean, 'emergency')


def custom_clean_data(df):
    """Clean categories and merge to messages

    Args:
        df => DataFrame of merged categories and messages csv files

    Returns:
        df => Dataframe of cleaned categories and dropped duplicateds

    """
    categories = pd.Series(df.categories).str.split(';', expand=True)
    row = categories.loc[0]
    category_colnames = row.apply(lambda x: x[:-2]).values
    categories.columns = category_colnames
    for column in categories:
        # set each value to be the last character of the string
        categories[column] = categories[column].apply(lambda x: x[-1:]).values
예제 #22
0
파일: test.py 프로젝트: qwqkimi/CL_NER1.0
import process_data

(train_x, train_y), (test_x, test_y), (vocab, chunk_tags) = process_data.load_data()
예제 #23
0
if model_params['module'] == 'gurobi':
    from optimization_model_gurobi import OptimizationModel
elif model_params['module'] == 'cplex':
    from optimization_model_docplex import OptimizationModel
elif model_params['module'] == 'xpress':
    from optimization_model_xpress import OptimizationModel
else:
    from optimization_model_pulp import OptimizationModel

__author__ = 'Ehsan Khodabandeh'
__version__ = '1.1'
# ====================================

LOG_FORMAT = '%(asctime)s  %(name)-12s %(levelname)s : %(message)s'
logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
logger = logging.getLogger(__name__ + ': ')

# ================== Set up data ==================
input_df_dict, input_param_dict = load_data()
logger.info('Data is loaded!')

# ================== Optimization ==================
start = time()
optimizer = OptimizationModel(input_df_dict['input_data'], input_param_dict)
logger.info(f'Model creation time in sec: {time() - start:.4f}')
optimizer.optimize()

# ================== Output ==================
optimizer.create_output()
logger.info('Outputs are written to csv!')
예제 #24
0
    ax = fig.add_axes([.14, .14, .8, .74])

    # plot our data
    ax.loglog(x_norm, y_norm, 'b-')
    ax.set_autoscale_on(False)  # Otherwise, infinite loop

    y_theory = function(x_norm, popt[0], popt[1])
    ax.loglog(x_norm, y_theory, 'r-')
    ax.set_title(words + " for " + filename)

    fig.savefig('./figures/spectral_fits/v/' + filename + '_fit_' + words +
                '.png')


for filename in FILENAMES:
    dat_bin = load_data(filename + '_binned')
    VINDS = [
        abs(dat_bin.v) < 0.05,
        (0.05 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.1),
        (0.1 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.2),
        (0.2 < abs(dat_bin.v)) & (abs(dat_bin.v) < 0.3),
        abs(dat_bin.u) > 0.3
    ]
    VINDS_words = [
        ' v less than 0.5 ', ' v between 0.5 and 1.0 ',
        ' v between 1.0 and 1.5 ', ' v between 1.5 and 2.0 ',
        ' v greater than 2.0 '
    ]

    fname = './csv_files/' + filename + '_results.csv'
    if isfile(fname):
예제 #25
0
def test_clean_data():
    df1 = load_data('disaster_messages.csv', 'disaster_categories.csv')
    df2 = clean_data(df1)

    assert (df2.shape == (26216, 40))
예제 #26
0
#parser.add_argument('--pluck-damping-variation', type=float, default=0.25,
#                    help='Pluck damping variation (default: 0.25)')
#parser.add_argument('--string-tension', type=float, default=0.1,
#                    help='String tension (default: 0.0)')
#parser.add_argument('--stereo-spread', type=float, default=0.2,
#                    help='Stereo spread (default: 0.2)')
#parser.add_argument('--string-damping-calculation', type=str, default='magic',
#                    help='Stereo spread (default: magic)')
#parser.add_argument('--body', type=str, default='simple',
#                    help='Stereo spread (default: simple)')
#parser.add_argument('--mode', type=str, default='karplus-strong', choices=['karplus-strong', 'sine'],
#                    help='Which type of audio to generate.')
#args = parser.parse_args()

if __name__ == '__main__':

    #    guitar = Guitar(options=args)
    #    audio_buffer = sequencer.play_guitar(guitar)
    #    cqt = librosa.cqt(audio_buffer, sr=40000, n_bins=84*4, bins_per_octave=12*4, hop_length=256, filter_scale=0.8)
    #   inverse_cqt = librosa.icqt(cqt, sr=40000, bins_per_octave=12*4, hop_length=256, filter_scale=0.8)
    #    print('CQT size: {}'.format(cqt.shape))
    #    # plt.imshow(cqt)
    #    # plt.show()
    #    librosa.output.write_wav('guitar_output.wav', audio_buffer, 40000)  # The 40000 is the sampling frequency
    #    librosa.output.write_wav('guitar_output_reconstructed.wav', inverse_cqt, 40000)  # The 40000 is the sampling frequency

    net = process_data.Net().to(device)
    train_data, test_data, val_data, eval_data = process_data.load_data()
    #    process_data.train_model(net, train_data, val_data, eval_data)
    process_data.test(net, test_data)
예제 #27
0
def reynolds_stress(dat_bin, filename):
    """Plots the Reynold's Stress"""
    fig = plt.figure(1, figsize=[8, 4])
    fig.clf()
    ax = fig.add_axes([.14, .14, .8, .74])

    # first, convert the num_time to date_time, and plot this versus dat_raw.u
    date_time = dt.num2date(dat_bin.mpltime)

    # plot the data
    ax.plot(date_time, dat_bin.upvp_, 'r-', rasterized=True)
    ax.plot(date_time, dat_bin.upwp_, 'g-', rasterized=True)
    ax.plot(date_time, dat_bin.vpwp_, 'b-', rasterized=True)

    # label axes
    ax.set_xlabel('Time')
    ax.set_ylabel('Reynolds Stresses $\mathrm{[m^2/s^2]}$', size='large')

    fig.savefig('./figures/' + filename + '_reynolds_plot.png')


for filename in FILENAMES:
    dat_bin = load_data(filename + '_binned')
    dat_screen = load_data(filename + '_processed')
    dat_raw = load_data(filename + '_raw')
    processed_plot(dat_raw, dat_screen, filename)
    spectrum_plot(dat_bin, filename)
    tke_plot(dat_bin, filename)
    reynolds_stress(dat_bin, filename)
예제 #28
0
from ADNet_6 import Model
import process_data

x_train, x_dev, y_train, y_dev = process_data.load_data('train')
x_test, id = process_data.load_data('test')
model = Model()
model.train(x_train, y_train, x_dev, y_dev, batch_size=64, epoch=20)
predict = model.predict(x_test)
print('result ==> result.csv')
process_data.output_data(predict, id)
예제 #29
0
            return (p + 1) / ((y == y_i).sum() + 1)

        self._r = sparse.csr_matrix(np.log(pr(x, 1, y) / pr(x, 0, y)))
        x_nb = x.multiply(self._r)
        if self.dual == 'auto':
            self.dual = x_nb.shape[0] <= x_nb.shape[1]
        self._clf = LogisticRegression(C=self.C,
                                       dual=self.dual,
                                       n_jobs=1,
                                       verbose=self.verbose)
        self._clf.fit(x_nb, y)
        return self


(train_ids, train_texts,
 train_labels), (test_ids, test_texts) = process_data.load_data("../input")

# My version of cleaning the text
# clean_text = partial(process_data.clean_text, remove_stopwords=True, replace_numbers=True, remove_special_chars=True,
#                      stem_words=True)
# train_texts = process_data.clean(train_texts, clean_text)
# test_texts = process_data.clean(test_texts, clean_text)
# tokenize = nltk.tokenize.word_tokenize

# The kernel writer's version
import re, string

re_tok = re.compile('([{}“”¨«»®´·º½¾¿¡§£₤‘’])'.format(string.punctuation))


def tokenize(s):
import numpy as np
import sys
from pathlib import Path
import os

if __name__ == "__main__":

    datafile = sys.argv[1]
    file_trics = sys.argv[2]
    path_matr = sys.argv[3]
    target_var = sys.argv[4]
    n = int(sys.argv[5])
    cat_feats = list(map(str, sys.argv[6].strip('[]').split(',')))
    cont_feats = list(map(str, sys.argv[7].strip('[]').split(',')))

    data_in = pdata.load_data(datafile)

    filename = datafile.split("/")[-1]

    sps = pdata.computeNSnapshots(data_in, 3, target_var)

    y = list()
    values = list()
    cols = list()
    for p in sps.keys():
        tp = data_in[p]
        for snaps in sps[p]:
            l = list()
            l.append(p)
            for e in snaps:
                i = e[0]
    adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-6)
    model.compile(loss='binary_crossentropy', class_mode = 'multi_label',
            optimizer=adadelta)

    return model


if __name__ == "__main__":
    if(sys.argv[1] == ""):
        print ("argv[1] is the path of file made from process_data.py")
        exit()
    #config
    batch_size = 50
    nb_epoch = 25

    input_shape, w2v_dim, label_class, data, label = load_data(path =sys.argv[1],filter_h =5);
    label = np_utils.to_categorical(label, label_class)

    #print("Pad sequences(sample x time)")
    #data = sequence.pad_sequences(data)

    model = deep_CNN(input_shape, label_class)

    print("begin train model..")
    model.fit(data, label,
            batch_size = batch_size,
            nb_epoch = nb_epoch,
            shuffle = True,
            show_accuracy = True,
            verbose = 1
            )
예제 #32
0
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import numpy as np
from process_data import load_data

d = 'p1'
matdir = 'data/' + d
datafile = matdir + '/delays.pkl'
keys, stats, ufiles = load_data(matdir, datafile)

##################
# Do some plots
##################
#def plot_data(keys,stats):
#stats['nrej'][cls][delay_cc][delay_ih] += 1

plt.ioff()
for c in sorted(keys['clses']):
    print c

    X = np.array(np.sort(list(keys['cc'])))
    Y = np.array(np.sort(list(keys['ih'])))
    Z = np.zeros((X.shape[0], Y.shape[0]))
    for i in np.arange(X.shape[0]):
        for j in np.arange(Y.shape[0]):
            Z[i][j] = np.mean(stats['l_p'][c][X[i]][Y[j]])
    X, Y = np.meshgrid(X, Y)

    fig = plt.figure()