Python split 예제들, preprocess.split Python 예제들

예제 #1

0

파일 보기

def main(arg=None):
    #load npz file.
    weights = np.load('./vgg16_weights.npz')

    #Weights_Tranined store the data of vgg16_weights.npz that download from internet.
    #For example: As a member of Weights_Tranined, 'conv11_w' corresponds with the
    #			 weights of the first conv3-64. 'conv11_b' corresponds with the bias
    #			 of the first conv3-64.
    w_trained = Weights_Tranined(weights)

    #Build training and validation sets
    data_path = os.path.join(os.getcwd(), 'fmnist')
    print(data_path)
    imgs, labels = pre.processing(data_path, CLASS_NUM)
    train_img, train_label, validation_img, validation_label = pre.split(
        imgs, labels)

    print(train_img.shape)
    print(train_label.shape)

    train_model(t_x=train_img,
                t_y=train_label,
                weights=w_trained,
                dprate=DROPOUT_RATE,
                imgsize=IMG_SIZE,
                imgchannel=IMG_CHANNEL,
                batchsize=BATCH_SIZE,
                train_step=TRAINING_STEP,
                learningrate=LEARNING_RATE_BASE,
                learningdecay=LEARNING_RATE_DECAY,
                regurate=REGULARIZATION_RATE)

예제 #2

0

파일 보기

파일: AutoMS.py 프로젝트: sudarsun/ams

def split_sample(file_name):
	#splitting datasets into test and train 
	import preprocess as sp
	train_df,test_df = sp.split(file_name)
	#(random sampling)bagging train data
	dataset_name,file_type = file_name.split('.')
	dataset_name_train = dataset_name + "_train.csv"
	#(random sampling)bagging test data
	dataset_name,file_type = file_name.split('.')
	dataset_name_test = dataset_name + "_test.csv"
	return dataset_name_train,dataset_name_test,train_df,test_df

예제 #3

0

파일 보기

파일: infer.py 프로젝트: shashank-m/Alzheimers-Detection

def score(filename, disp=True):
    cleaned_df = clean('oasis_longitudinal.csv')
    _, X_test, _, Y_test = split(cleaned_df)

    model = pickle.load(open(filename, 'rb'))
    Y_pred = model.predict(X_test)
    recall = recall_score(Y_test, Y_pred)
    accuracy = accuracy_score(Y_test, Y_pred)
    if disp:
        print(model)
        print(f"Accuracy = {accuracy}")
        print(f"Recall= {recall}")
    return model

예제 #4

0

파일 보기

파일: basic.py 프로젝트: phoenixfin/general-tensorflow-timeseries

    def generate_data(self, seq2seq=False):
        import generator as gen

        series = gen.complete(self.time, pg.baseline, pg.slope, pg.period,
                              pg.amplitude, pg.noise_level)
        train, valid = pp.split(pg.time, series, pg.split_time)

        train_set = pp.window_dataset(train[1],
                                      pm.window_size,
                                      pm.batch_size,
                                      seq2seq=seq2seq)
        valid_set = pp.window_dataset(valid[1],
                                      pm.window_size,
                                      pm.batch_size,
                                      seq2seq=seq2seq)

        self.main_series = series
        self.set_data(train_set, valid_set)

예제 #5

0

파일 보기

파일: id3.py 프로젝트: varunkp/BerkeleyCS

def id3(examples, attributes):

	root = dt.TreeNode()

	one_count = sum([int(y) for X, y in examples]) 

	if one_count == len(examples):
		root.label = 1
		return root

	if one_count == 0:
		root.label = 0
		return root

	if not attributes:
		if one_count >= len(examples) / 2.0:
			root.label = 1
		else:
			root.label = 0
		return root

	best_attribute, best_value, info_gain = pp.split(examples, attributes)
	
	if info_gain == 0:
		if one_count >= len(examples) / 2.0:
			root.label = 1
		else:
			root.label = 0
		return root
	root.attribute = best_attribute
	root.split_value = best_value
	left_exs = []
	right_exs = []
	for ex in examples:
		if ex[0][root.attribute] <= root.split_value:
			left_exs.append(ex)
		else:
			right_exs.append(ex)
	new_attributes = attributes.copy()
	new_attributes.remove(best_attribute)
	root.left_child = id3(left_exs, new_attributes)
	root.right_child = id3(right_exs, new_attributes)

	return root

예제 #6

0

파일 보기

def id3(examples, attributes):

    root = dt.TreeNode()

    one_count = sum([int(y) for X, y in examples])

    if one_count == len(examples):
        root.label = 1
        return root

    if one_count == 0:
        root.label = 0
        return root

    if not attributes:
        if one_count >= len(examples) / 2.0:
            root.label = 1
        else:
            root.label = 0
        return root

    best_attribute, best_value, info_gain = pp.split(examples, attributes)

    if info_gain == 0:
        if one_count >= len(examples) / 2.0:
            root.label = 1
        else:
            root.label = 0
        return root
    root.attribute = best_attribute
    root.split_value = best_value
    left_exs = []
    right_exs = []
    for ex in examples:
        if ex[0][root.attribute] <= root.split_value:
            left_exs.append(ex)
        else:
            right_exs.append(ex)
    new_attributes = attributes.copy()
    new_attributes.remove(best_attribute)
    root.left_child = id3(left_exs, new_attributes)
    root.right_child = id3(right_exs, new_attributes)

    return root

예제 #7

0

파일 보기

def run():

    for gap in xrange(7):
        expansion = False
        data, comm = preprocess.loadData(gap, expansion)
        train_data, test_data = preprocess.split(data, comm)
        train = np.array(train_data[0])
        test = np.array(test_data[0])
        test_com = test_data[1]

        train_x = train[:, 1:]
        train_y = train[:, 0]

        print train_x.shape

        reg = gcv(train_x, train_y)

        if len(sys.argv) > 1 and sys.argv[1] == 'output':
            test_x = test[:, 1:]
            pred = reg.predict(test_x)
            output_test(pred, test_com, gap)

예제 #8

0

파일 보기

파일: id3.py 프로젝트: varunkp/BerkeleyCS

def id3_depth_limited(examples, attributes, depth):

	root = dt.TreeNode()

	if sum([y for X, y in examples]) == len(examples):
		root.label = 1
		return root

	if sum([y for X, y in examples]) == 0:
		root.label = 0
		return root

	if not attributes or depth == 0:
		if sum([y for X, y in examples]) >= len(examples) / 2.0:
			root.label = 1
		else:
			root.label = 0
		return root
	best_attribute, best_value, info_gain = pp.split(examples, attributes)
	if info_gain == 0:
		if sum([y for X, y in examples]) >= len(examples) / 2.0:
			root.label = 1
		else:
			root.label = 0
		return root
	root.attribute = best_attribute
	root.split_value = best_value
	left_exs = []
	right_exs = []
	for ex in examples:
		if ex[0][root.attribute] <= root.split_value:
			left_exs.append(ex)
		else:
			right_exs.append(ex)
	new_attributes = attributes.copy()
	new_attributes.remove(best_attribute)
	root.left_child = id3_depth_limited(left_exs, new_attributes, depth-1)
	root.right_child = id3_depth_limited(right_exs, new_attributes, depth-1)

	return root

예제 #9

0

파일 보기

def id3_depth_limited(examples, attributes, depth):

    root = dt.TreeNode()

    if sum([y for X, y in examples]) == len(examples):
        root.label = 1
        return root

    if sum([y for X, y in examples]) == 0:
        root.label = 0
        return root

    if not attributes or depth == 0:
        if sum([y for X, y in examples]) >= len(examples) / 2.0:
            root.label = 1
        else:
            root.label = 0
        return root
    best_attribute, best_value, info_gain = pp.split(examples, attributes)
    if info_gain == 0:
        if sum([y for X, y in examples]) >= len(examples) / 2.0:
            root.label = 1
        else:
            root.label = 0
        return root
    root.attribute = best_attribute
    root.split_value = best_value
    left_exs = []
    right_exs = []
    for ex in examples:
        if ex[0][root.attribute] <= root.split_value:
            left_exs.append(ex)
        else:
            right_exs.append(ex)
    new_attributes = attributes.copy()
    new_attributes.remove(best_attribute)
    root.left_child = id3_depth_limited(left_exs, new_attributes, depth - 1)
    root.right_child = id3_depth_limited(right_exs, new_attributes, depth - 1)

    return root

예제 #10

0

파일 보기

    if mode == "train":
        print "Training the bayes model"
        bayes_model(mode="train")
    elif mode == "test":
        if os.path.exists(bayes_model_file):
            print "Model is already trained! Reading the file ..."
            trained_model = pickle.load(open(bayes_model_file, "rb"))
            bayes_model(mode="test", trained_model=trained_model)
        else:
            print "Model file not found :("
    elif mode == "score":
        bayes_model(mode="score")
    elif mode == "join":
        pre.join_per_business()
    elif mode == "split":
        pre.split()
elif sys.argv[1] == 'neural-net':
    mode = str(sys.argv[2])
    if mode in ["train", "test"]:
        neural_net(mode)
        # if os.path.exists(bayes_model_file):
        # 	print "Model is already trained! Reading the file ..."
        # 	trained_model = pickle.load(open(bayes_model_file, "rb"))
        # 	bayes_model(mode="test", trained_model=trained_model)
        # else:
        # 	print "Model file not found :("
    elif mode == "score":
        bayes_model(mode="score")
    elif mode in ["join", "split"]:
        pre.join_and_split(mode=mode)
elif sys.argv[1] == 'scikit-classifier':

예제 #11

0

파일 보기

mode = sys.argv[1]
input = pd.read_csv(sys.argv[2])
params = json.load(open(sys.argv[3]))

feature = params[FEATURES]
label = params[LABELS]
print('FEATURE: {}\nLABEL: {}'.format(feature, label))

data = pd.DataFrame(columns={label, feature})
data[feature] = filter(input[feature])
if mode == TRAIN:
    # preprocess labels
    data[label] = filter(input[label])
    data[label] = clean(data[label], 'label')
    y = data.pop(label)
    X_train, y_train, X_test, y_test = split(data, y,
                                             0.3)  #train-test ratio 70:30

    X_train[label] = y_train
    X_test[label] = y_test

    X_test.to_csv(PATH + 'test_file.csv', sep=',')

    print('Begin training -- TRAIN: {} TEST: {}'.format(
        len(X_train), len(X_test)))
    # create the datasets for training
    test_file, dev_file, train_file = format_data(X_train, label, 0.3)

    # train model
    model = train(PATH, test_file, dev_file, train_file)

    # evaluate

예제 #12

0

파일 보기

import numpy as np
import pandas as pd
import algo
import preprocess
import recommend

# raw data
movies_raw = pd.read_table('./ml-1m/movies.dat', sep = '::', names = ['MovieID', 'Title', 'Genres'], engine = 'python')
ratings_raw = pd.read_table('./ml-1m/ratings.dat', sep = '::', names = ['UserID', 'MovieID', 'Rating', 'Timestamp'],engine = 'python')
users_raw = pd.read_table('./ml-1m/users.dat', sep = '::', names = ['UserID','Gender','Age','Occupation','Zip-code'], engine = 'python')

##################################### Part - 1 Refining dataset and creating test set

# separting test sets i.e. new users and new movies
ratings_raw, user_test, movie_test = preprocess.split(ratings_raw, fraction = 0.1)

# refining ratings_raw
ratings_nan = ratings_raw.iloc[:,0:3]
ratings_nan = ratings_nan.pivot(index='UserID', columns='MovieID', values='Rating')

# process info of users who have rated 
users = preprocess.user_info(users_raw,ratings_raw)

# process info of movie been rated 
movies, genres = preprocess.movie_info(movies_raw, ratings_raw)

# genre mapping : helps to convert a new movie vector
lst = []
for i in range(0,len(genres)):
    lst.append((genres[i],i))
genre_mapping = dict(lst)

예제 #13

0

파일 보기

파일: main.py 프로젝트: vemichelleve/finalcode

import embedding
import model
import preprocess
from sklearn.model_selection import KFold

input_dataset = './Augmented_Feat.csv'
embedmodel = embedding.train_word2vec('./glove.6B.300d.txt')
question = './questions.csv'


df = preprocess.cleaning_dataset(input_dataset)
df = preprocess.question_demoting(df, question)

X, y = preprocess.scale(df)

X_train, X_test, y_train, y_test = preprocess.split(X, y, 0.2)

split = 5
index = 0
train_model = [None] * split
tokenizer = [None] * split
acc = [None] * split
kfold = KFold(n_splits=split, shuffle=True, random_state=101)
for train, test in kfold.split(X_train, y_train):
    train_model[index], tokenizer[index] = model.train(X_train.iloc[train], y_train[train], embedmodel)
    test_results = model.predict(X_train.iloc[test], train_model[index], tokenizer[index])
    test_results, y_true = model.processresult(test_results, y_train[test])
    acc[index], _ = model.evaluate(test_results, y_true)
    index += 1

index = 0

예제 #14

0

파일 보기

def main(args):
    # Create output folder
    util.mkdir(args['output'], args['clean'])

    # Tensorflow logging
    tf.logging.set_verbosity(tf.logging.WARN)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    # Logging to DWF server
    dwf_logging = None

    # Logging
    logger = logging.getLogger('DeepBugHunter')
    
    if 'dwf_client_info' in args:
        client_info = args['dwf_client_info']
        sys.path.insert(0, client_info['util_path'])
        dwf_logging = __import__('dwf_logging')        
            
    if not logger.handlers:
        formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
                                      datefmt='%Y-%m-%d %H:%M:%S')
        logger.setLevel(logging.DEBUG)
        fh = logging.FileHandler(os.path.join(args['output'], 'dbh.log'), mode='a')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)
        logger.addHandler(fh)
        if 'dwf_client_info' in args:
            http_handler = dwf_logging.LogHandler()
            http_handler.setLevel(logging.INFO)
            logger.addHandler(http_handler)
        else:
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            ch.setFormatter(formatter)
            logger.addHandler(ch)

    extra_log_data = {}
    if dwf_logging is not None:
        extra_log_data = {'progress' : 0, 'hash' : client_info['client_id']}

    logger.info(msg='DBH started...', extra=extra_log_data)
    logger.info('Input csv is ' + args['csv'])

    # Seeding global random states, just in case...
    tf.set_random_seed(args['seed'])
    # This is used for sklearn algorithms under the hood so we don't have to manually
    # set the random seed separately every time
    np.random.seed(args['seed'])

    # Load the whole input
    data = csv2pandas.load_data(args['csv'], args['label'], args['seed'])

    # Apply optional preprocessing
    for (what, how) in args['preprocess']:
        # TODO: use <what> and generalize preprocessors
        data = getattr(preprocess, how)(*data)

    table = []
    strategy_i = 0
    strategy_cnt = len(args['strategy'])
    for (strategy, sargs) in args['strategy']:

        strategy_i += 1
        logger.info('(%d/%d) Strategy "%s" started with args: <%s>', strategy_i, strategy_cnt, strategy, sargs)

        # Aggregate confusion matrices
        cv_train = ConfMatrix()
        cv_dev = ConfMatrix()
        cv_test = ConfMatrix()

        # For each fold
        fold_generator = preprocess.split(data, folds=FOLDS, seed=args['seed'])
        fold_i = 0
        for remainder, test in fold_generator():
            fold_i += 1

            # A single dev split
            # Not fully fair, but fairer...
            train, dev = next(preprocess.split(remainder, folds=FOLDS, seed=args['seed'])())
            
            # Resample the training set
            if args['resample'] is not 'none':
                train = preprocess.resample(*train, mode=args['resample'], amount=args['resample_amount'], seed=args['seed'])

            # Evalute according to the current strategy
            train_res, dev_res, test_res, cl = getattr(strategies, strategy).learn(train, dev, test, args, sargs)
            
            # Aggregate metrics for cross-validation F-Measure
            cv_train.add(train_res)
            cv_dev.add(dev_res)
            cv_test.add(test_res)
            
            if args['calc_completeness']:
                preds = getattr(strategies, strategy).predict(cl, dev, args, sargs)
                issues = preprocess.get_orig_labels(dev[1])
                cv_dev.calc_completeness(preds, issues)
                
                preds = getattr(strategies, strategy).predict(cl, test, args, sargs)
                issues = preprocess.get_orig_labels(test[1])
                cv_test.calc_completeness(preds, issues)


            if dwf_logging is not None:
                extra_log_data = {'progress' : fold_i / FOLDS, 'hash' : client_info['client_id']}
            
            logger.info('Fold %d/10 done', fold_i, extra=extra_log_data)

        train_stats = cv_train.stats(False)
        dev_stats = cv_dev.stats(args['calc_completeness'])
        test_stats = cv_test.stats(args['calc_completeness'])

        logger.info('%s[%s] results:', strategy, sargs)
        logger.info('train: %s', train_stats)
        logger.info('dev:   %s', dev_stats)
        logger.info('test:  %s', test_stats)

        if dwf_logging is not None:
            result = dwf_logging.pack_results(train_stats, dev_stats, test_stats)
            dwf_logging.report_result(result, client_info['client_id'])            


        table.append([
            args['resample'],
            args['resample_amount'],
            args['preprocess'],
            strategy,
            sargs,
            train_stats['fmes'],
            dev_stats['fmes'],
            test_stats['fmes'],
            train_stats,
            dev_stats,
            test_stats,
        ])

    with open(os.path.join(args['output'], 'dbh.csv'), 'a') as f:
        for line in table:
            f.write(';'.join([str(item) for item in line]) + '\n')

예제 #15

0

파일 보기

파일: build_inference.py 프로젝트: ancamarginean/financial_index

def get_data(type, oneyear):  #without sequences
    data = read_data()
    proc_data, y = prepare_data(oneyear, data)
    x_train, x_test, y_train, y_test = split(proc_data, y, type)
    return x_train, x_test, y_train, y_test

예제 #16

0

파일 보기

파일: basic.py 프로젝트: phoenixfin/general-tensorflow-timeseries

 def plot_forecast(self):
     series = self.main_series[pg.split_time - pm.window_size:-1]
     fc = self.forecast(series)[:, 0]
     _, valid = pp.split(pg.time, self.main_series, pg.split_time)
     from support import plot_series
     plot_series(valid[0], [valid[1], fc], labels=["Real", "Forecast"])

예제 #17

0

파일 보기

파일: model.py 프로젝트: shashank-m/Alzheimers-Detection

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
import pickle
from preprocess import clean, split

cleaned_df = clean('oasis_longitudinal.csv')
X_train, X_test, Y_train, Y_test = split(cleaned_df)

logistic_model = LogisticRegression(C=10).fit(X_train, Y_train)
forest_model = RandomForestClassifier(n_estimators=3,
                                      max_features=4,
                                      n_jobs=4,
                                      max_depth=5,
                                      random_state=0).fit(X_train, Y_train)
tree_model = DecisionTreeClassifier(random_state=0,
                                    max_depth=1,
                                    criterion='gini').fit(X_train, Y_train)
adaboost_model = AdaBoostClassifier(n_estimators=3,
                                    learning_rate=0.0001,
                                    random_state=0).fit(X_train, Y_train)

pickle.dump(logistic_model, open('model_files/logistic.sav', 'wb'))
pickle.dump(forest_model, open('model_files/forest.sav', 'wb'))
pickle.dump(tree_model, open('model_files/tree.sav', 'wb'))
pickle.dump(adaboost_model, open('model_files/adaboost.sav', 'wb'))

예제 #18

0

파일 보기

def naive(series, split_time):
    return series[split_time - 1:-1]


def moving_average(series, window_size):
    mov = np.cumsum(series)
    mov[window_size:] = mov[window_size:] - mov[:-window_size]
    return mov[window_size - 1:-1] / window_size


series = gen.complete(p.time, p.baseline, p.slope, p.period, p.amplitude,
                      p.noise_level)
time2, series2 = pp.remove_season(time, series)

train, valid = pp.split(time, series, split_time)
train2, valid2 = pp.split(time, series2, split_time - period)

naive_prediction = naive(series, split_time)
plot_series(valid[0], [valid[1], naive_prediction],
            labels=["Series", "Naive Forecast"])
print(mae(naive_prediction, valid[1]))

window = 30
moving_avg = moving_average(series, window)[split_time - window:]
plot_series(valid[0], [valid[1], moving_avg],
            labels=["Series", "Moving average (30 days)"])
print(mae(moving_avg, valid[1]))

window = 50
diff_moving_avg = moving_average(series2,

예제 #19

0

파일 보기

파일: main.py 프로젝트: NaokiDohi/covid-19

def main():
    args = sys.argv
    ARG_NUM = 5
    if(len(sys.argv) < ARG_NUM):
        print("Error")
        sys.exit(0)

    df = pd.read_csv('COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
    df = df[df['Country/Region']=='Japan']
    df = df.iloc[:,4:].copy()
    data_at_japan = df.iloc[0,:]
    data_at_japan.index = pd.to_datetime(data_at_japan.index)
    #print(data_at_japan)
    plt.figure(figsize=(10,5))
    plt.plot(data_at_japan)
    plt.title('COVID-19 confilmed at Japan', y = -0.2)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    #plt.show()
    #ファイル保存
    fname_1 ='original.png'
    plt.savefig(fname_1)
    plt.close()

    data_at_japan_diff = data_at_japan - data_at_japan.shift(1) # 階差系列データの作成
    data_at_japan_diff = data_at_japan_diff.dropna()
    data_at_japan_diff = data_at_japan_diff['2020-01-23':'2020-10-28']#10-28
    #print(data_at_japan_diff)
    plt.figure(figsize=(10,5))
    plt.plot(data_at_japan_diff)
    plt.title('COVID-19 confilmed at Japan', y=-0.2)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    #plt.show()
    #ファイル保存
    fname_2 ='diff.png'
    plt.savefig(fname_2)
    plt.close()

    res = sm.tsa.seasonal_decompose(data_at_japan_diff)#データを分解
    original = data_at_japan_diff # オリジナルデータ
    trend_original = res.trend # トレンドデータ
    seasonal_original = res.seasonal # 季節性データ
    residual = res.resid # 残差データ
    plt.figure(figsize=(10, 20)) # グラフ描画枠作成、サイズ指定
    plt.subplot(411) # グラフ4行1列の1番目の位置（一番上）
    plt.plot(original)
    plt.title('COVID-19 confilmed(Original) at Japan', y=-0.17)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    # trend データのプロット
    plt.subplot(412) # グラフ4行1列の2番目の位置
    plt.plot(trend_original)
    plt.title('COVID-19 confilmed(Trend) at Japan', y=-0.17)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    # seasonalデータ のプロット
    plt.subplot(413) # グラフ4行1列の3番目の位置
    plt.plot(seasonal_original)
    plt.title('COVID-19 confilmed(Seasonality) at Japan', y=-0.17)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    # residual データのプロット
    plt.subplot(414) # グラフ4行1列の4番目の位置（一番下）
    plt.plot(residual)
    plt.title('COVID-19 confilmed(Residuals) at Japan', y=-0.17)
    plt.xlabel("Date")
    plt.ylabel("Person infected (people)")
    plt.grid(True)
    plt.tight_layout() # グラフの間隔を自動調整
    fname_3 ='decompose.png'
    plt.savefig(fname_3)


    y = data_at_japan_diff.values.astype(float)
    test_size = 7# test_size

    train_original_data, test_original_data = split(y)
    train_normalized = normalized(train_original_data)

    window = 7# 学習時のウィンドウサイズ
    study_data, correct_data  = sequence_creator(train_normalized, window)


    n_in_out = 1
    n_hidden = args[1]
    drop_out = args[2]
    tf.random.set_seed(0)

    # parameters = {
    #               'n_hidden': [16, 32, 64, 128, 256, 512, 1024]
    #               'dropout': [0, 0.2, 0.4, 0.5, 0.6],
    # }

    # model = KerasClassifier(build_fn=gru,
    #                         verbose=0)
    # gridsearch = GridSearchCV(estimator=model, param_grid=parameters)
    # gridsearch.fit(study_data, correct_data)
    # print('Best params are: {}'.format(gridsearch.best_params_))

    gru = gru(n_in_out, n_hidden, drop_out)
    print(gru.summary())

    filename = 'gru_'+str(n_hidden)+'_'+str(drop_out)+'.png'
    plot_model(gru, show_shapes=True, show_layer_names=True, to_file=filename)
    Image(retina=False, filename=filename)

    epochs = 1
    start_time = time.time()
    history = gru.fit(study_data, correct_data, batch_size=1, epochs=epochs, validation_split=0.1, verbose=1, callbacks=[])#lr_decay,
    print("学習時間:",time.time() - start_time)

    # === 学習推移の可視化 ===
    # mse
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    plt.plot(np.arange(len(train_loss)), train_loss, label="train_loss")
    plt.plot(np.arange(len(val_loss)), val_loss, label="val_loss")
    plt.title('Training and Validation loss')
    plt.ylim((0, 0.04))#add
    plt.legend()
    # plt.show()

    # === 学習推移の可視化 ===
    # mae
    train_mae = history.history['mae']
    val_mae = history.history['val_mae']
    plt.plot(np.arange(len(train_mae)), train_mae, label="train_mae")
    plt.plot(np.arange(len(val_mae)), val_mae, label="val_mae")
    plt.title('Training and Validation mae')
    plt.ylim((0, 0.2))#add
    plt.legend()
    #plt.show()

    train_inverse = past_predict(study_data)

    upcoming_future=7
    predictions_infected_pepole = test_predict(upcoming_future)

    x_all =np.arange('2020-01-23','2020-10-29', dtype='datetime64[D]').astype('datetime64[D]')
    x_past_predict = np.arange('2020-01-30','2020-10-22', dtype='datetime64[D]').astype('datetime64[D]')#23-26
    x_train = np.arange('2020-01-23','2020-10-22', dtype='datetime64[D]').astype('datetime64[D]')
    x_test = np.arange('2020-10-22', '2020-10-29', dtype='datetime64[D]').astype('datetime64[D]')

    sns.set()
    COVID = plt.figure(figsize=(20,8))
    plt.title("COVID-19 in Japan", y=-0.15)
    plt.grid(True)
    plt.xlabel("Date")
    plt.ylabel("Nunber of Person infected with corona virus (people)")
    plt.plot(x_all,data_at_japan_diff,'g',lw=3,label='daily_at_japan')
    # plt.plot(x_train,train_original_data,label='train_data')
    # plt.plot(x_test,test_original_data,label='test_data')
    plt.plot(x_past_predict,train_inverse,color='b', ls='-',lw=3,alpha=0.7, label='past_predict')#+8かも
    plt.plot(x_test, predictions_infected_pepole, 'r',lw=3,alpha=0.7,label='upcoming_future')
    plt.legend(loc='upper left')
    #plt.show()

    sns.set()
    COVID = plt.figure(figsize=(20,8))
    plt.title("COVID-19 in Japan", y=-0.15)
    plt.grid(True)
    plt.xlabel("Date")
    plt.ylabel("Nunber of Person infected with corona virus　(people)")
    plt.plot(x_test,test_original_data,color='b', ls='-',lw=3,alpha=0.7, label='past_predict')
    plt.plot(x_test, predictions_infected_pepole, 'r',lw=3,alpha=0.7,label='upcoming_future')
    #plt.show()

    train_mae, train_mse, train_rmse, train_r2, test_mae, test_mse, test_rmse, test_r2 = eval_func(train_inverse, test_original_data, predictions_infected_pepole)