Python ttsplitの例、sklearn.model_selection.ttsplit Pythonの例

コード例 #1

0

ファイルを表示

ファイル: GaitCycle.py プロジェクト: vaibhavsgr/Ad-Hoc

def predictGaitCycle(df):

    X = df.drop(
        {
            'Young_Slow', 'Young_Medium', 'Young_Fast', 'Adult_Slow',
            'Adult_Medium', 'Adult_Fast'
        },
        axis=1)
    YS = df['Young_Slow']
    YM = df['Young_Medium']
    YF = df['Young_Fast']
    AS = df['Adult_Slow']
    AM = df['Adult_Medium']
    AF = df['Adult_Fast']
    XYS = pd.concat([X, YM, YF], axis=1)
    XYM = pd.concat([X, YS, YF], axis=1)
    XYF = pd.concat([X, YS, YM], axis=1)
    XAS = pd.concat([X, AM, AF], axis=1)
    XAM = pd.concat([X, AS, AF], axis=1)
    XAF = pd.concat([X, AS, AM], axis=1)
    degree = 2

    #young slow regreesion by polynomial method
    X_train, X_test, Y_train, Y_test = ttsplit(X, YS, test_size=0.3)
    poly = PolynomialFeatures(degree)
    printGraph(X_train, X_test, Y_train, Y_test, poly, X)
    x_poly_train = poly.fit_transform(X_train)
    x_poly_test = poly.fit_transform(X_test)
    poly.fit(x_poly_train, Y_train)
    reg = lr()
    reg.fit(x_poly_train, Y_train)
    y_pred = reg.predict(x_poly_test)
    mae = mean_absolute_error(Y_test, y_pred)
    mse = mean_squared_error(Y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_test, y_pred)
    print(mae, mse, rmse, r2, sep=' ')

    #YOUNG SLOW WALK USING MEDIUM AND FAST WALK  by polynomial
    X_train, X_test, Y_train, Y_test = ttsplit(XYS, YS, test_size=0.3)
    poly = PolynomialFeatures(degree)
    x_poly_train = poly.fit_transform(X_train)
    x_poly_test = poly.fit_transform(X_test)
    poly.fit(x_poly_train, Y_train)
    reg = lr()
    reg.fit(x_poly_train, Y_train)
    y_pred = reg.predict(x_poly_test)
    mae = mean_absolute_error(Y_test, y_pred)
    mse = mean_squared_error(Y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_test, y_pred)
    print(mae, mse, rmse, r2, sep=' ')

    return True

コード例 #2

0

ファイルを表示

ファイル: train_xgboost.py プロジェクト: weilu/moneycat

def train_incremental(X, y):
    # split data into training and testing sets
    # then split training set in half
    # the first part is used as orginal data
    # the second part is used as incremental data
    X_train, X_test, y_train, y_test = ttsplit(X,
                                               y,
                                               test_size=0.1,
                                               random_state=0)
    X_train_origin, X_train_incremental, y_train_origin, y_train_incremental = ttsplit(
        X_train, y_train, test_size=0.5, random_state=0)

    xg_train_origin = xgb.DMatrix(X_train_origin, label=y_train_origin)
    xg_train_incremental = xgb.DMatrix(X_train_incremental,
                                       label=y_train_incremental)
    xg_test = xgb.DMatrix(X_test, label=y_test)

    # ================= xgboost classification model ====================#
    params = {'objective': 'multi:softmax', 'num_class': len(category)}
    params['silent'] = 1
    num_round = 30

    model_origin = xgb.train(params, xg_train_origin, num_round)
    model_origin.save_model('xgb_model.model')

    # ================= train two versions of the model =====================#
    model_none_incremental = xgb.train(params, xg_train_incremental, num_round)
    model_incremental = xgb.train(params,
                                  xg_train_incremental,
                                  num_round,
                                  xgb_model='xgb_model.model')

    # benchmark
    pred_origin = model_origin.predict(xg_test)
    score = metrics.accuracy_score(y_test, pred_origin)
    f1 = metrics.f1_score(y_test, pred_origin, average='weighted')
    print('original model accuracy of %0.3f, and f1 score of %0.3f' \
          % (score, f1))

    # "before"
    pred_none_incremental = model_none_incremental.predict(xg_test)
    score = metrics.accuracy_score(y_test, pred_none_incremental)
    f1 = metrics.f1_score(y_test, pred_none_incremental, average='weighted')
    print('none incremental model accuracy of %0.3f, and f1 score of %0.3f' \
          % (score, f1))

    # "after"
    pred_incremental = model_incremental.predict(xg_test)
    score = metrics.accuracy_score(y_test, pred_incremental)
    f1 = metrics.f1_score(y_test, pred_incremental, average='weighted')
    print('incremental model accuracy of %0.3f, and f1 score of %0.3f' \
          % (score, f1))

コード例 #3

0

ファイルを表示

ファイル: helpers.py プロジェクト: yaseminerinanc/pyAstroTrader

def get_best_booster(target_variable, max_interactions, df, astro_columns):
    booster = None
    best_score = 1
    best_booster = None
    for current_run in range(max_interactions):
        X = df[astro_columns].values
        Y = df[target_variable].values
        total_test = xgb.DMatrix(X, feature_names=astro_columns)
        X_train_1, X_train_2, y_train_1, y_train_2 = ttsplit(X,
                                                             Y,
                                                             test_size=0.3,
                                                             random_state=None,
                                                             shuffle=True)
        booster = create_booster_swing_trade(ETA, DEPTH, NUM_TREES, X_train_1,
                                             y_train_1, X_train_2, y_train_2,
                                             astro_columns, booster)
        current_score = mse(booster.predict(total_test), Y)
        if current_score < best_score:
            best_score = current_score
            best_booster = booster
        gc.collect()
        print("{} - {} of {}, {}".format(target_variable, current_run,
                                         max_interactions, best_score))
        if best_score < MIN_PRECISION:
            break
    return best_booster, best_score

コード例 #4

0

ファイルを表示

ファイル: Lab-7.py プロジェクト: rapperdinesh/Python-codes-byme

def train_test_split(dataframe):

    class_new = dataframe['class']
    dataframe = dataframe.drop(columns=["class"])

    X_train, X_test, y_train, y_test = ttsplit(dataframe,
                                               class_new,
                                               test_size=0.3,
                                               random_state=42)

    Data = [X_train, X_test, y_train, y_test]
    return Data

コード例 #5

0

ファイルを表示

ファイル: Lab-5.py プロジェクト: rapperdinesh/Python-codes-byme

def train_test_split(dataframe):

    class_new = dataframe['Class']

    X_train, X_test, y_train, y_test = ttsplit(dataframe,
                                               class_new,
                                               test_size=0.3,
                                               random_state=42)

    Data = [X_train, X_test, y_train, y_test]
    return Data
    """

コード例 #6

0

ファイルを表示

ファイル: train.py プロジェクト: dannybellis/Semantic-Segmentation

def train(model_name,
          optimizer_name,
          scheduler_name,
          lr,
          img_path,
          mask_path,
          names_path,
          epochs=10):

    model = models.getModel(model_name)
    model.build((None, None, None, 3))
    print(model.summary())

    scheduler = schedulers.getScheduler(scheduler_name, lr)

    optimizer = optimizers.getOptimizer(optimizer_name, scheduler)

    cce = tf.keras.losses.CategoricalCrossentropy()

    train_loss_metric = tf.keras.metrics.Mean()
    train_accuracy_metric = tf.keras.metrics.CategoricalAccuracy()

    test_loss_metric = tf.keras.metrics.Mean()
    test_accuracy_metric = tf.keras.metrics.CategoricalAccuracy()

    file_list = open(names_path, 'r')
    names = file_list.read().splitlines()
    file_list.close()

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
    test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    trainset, testval = ttsplit(names, train_size=0.9)
    test, val = ttsplit(testval, train_size=0.5)

    trainset = names
    print(names)
    total_step = 0
    with tf.device('/device:GPU:0'):
        for epoch in range(epochs):
            for step_, batch in enumerate(trainset):
                total_step += 1
                print(total_step)
                img, mask = utils.genData(batch, mask_path, img_path)
                with tf.GradientTape() as tape:
                    mask_pred = model(img)
                    loss = cce(mask, mask_pred)

                train_loss_metric.update_state(loss)
                train_accuracy_metric.update_state(mask, mask_pred)

                grads = tape.gradient(loss, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_weights))
                print(total_step)
                if step_ % 150 == 0:

                    with train_summary_writer.as_default():
                        tf.summary.scalar('Training Loss',
                                          train_loss_metric.result(),
                                          step=total_step)
                        tf.summary.scalar('Training Accuracy',
                                          train_accuracy_metric.result(),
                                          step=total_step)

                    for step, batch in enumerate(val):
                        img_val, mask_val = utils.genData(
                            batch, mask_path, img_path)
                        mask_pred_val = model(img_val)
                        loss_val = cce(mask_val, mask_pred_val)
                        print(loss_val)

                        test_loss_metric.update_state(loss_val)
                        test_accuracy_metric.update_state(
                            mask_val, mask_pred_val)

                    with test_summary_writer.as_default():
                        tf.summary.scalar('Validation Loss',
                                          test_loss_metric.result(),
                                          step=total_step)
                        tf.summary.scalar('Validation Accuracy',
                                          test_accuracy_metric.result(),
                                          step=total_step)

                    print('Epoch: ' + str(epoch) + ' | Batch: ' + str(step) +
                          ' | Training Loss: ' +
                          str(train_loss_metric.result().numpy()) +
                          ' | Training Accuracy: ' +
                          str(train_accuracy_metric.result().numpy()))
                    print('Epoch: ' + str(epoch) + ' | Batch: ' + str(step) +
                          ' | Validation Loss: ' +
                          str(test_loss_metric.result().numpy()) +
                          ' | Validation Accuracy: ' +
                          str(test_accuracy_metric.result().numpy()))

                    train_loss_metric.reset_states()
                    train_accuracy_metric.reset_states()

                    test_loss_metric.reset_states()
                    test_accuracy_metric.reset_states()

コード例 #7

0

ファイルを表示

def train_test_split(X, Y, test_fraction, random_seed=None):
    return ttsplit(X, Y, test_size=test_fraction, random_state=random_seed)

コード例 #8

0

ファイルを表示

ファイル: RandomForest_oversampler.py プロジェクト: prateeknima/Filtration-of-Misleading-Videos-using-Machine-Learning

#converting the data into vectorized format
vect = CountVectorizer(stop_words="english", max_features=10000).fit(final_sent)
print("exit")
len(vect.get_feature_names())
train_vectorized = vect.transform(final_sent)
print("Stage 2 complete")

# Oversampling the data to solve the issue of data imbalance
sampler = RandomOverSampler(ratio={1: 661902, 0: 661902},random_state=0)
X_rs, y_rs = sampler.fit_sample(train_vectorized, youtube_train['tag'])
print("Stage Oversampler")


#splitting the data for training and testing
x_train,x_test,y_train,y_test = ttsplit(X_rs,y_rs,test_size=0.25)
print("Stage 3 complete")

#Intializing the Random Forest Classifier
randomfor = RandomForestClassifier()
print("Exit 4")
randomfor.fit(x_train,y_train)
prediction = randomfor.predict(x_test)


#printing the accuracy score
print(accuracy_score(y_test,prediction))
print("Stage 5 complete")

#printing the overall metrics
from sklearn import metrics

コード例 #9

0

ファイルを表示

# Main data Pipeline

data = pd.read_csv('mle_fraud_test.csv', sep=';', index_col=0)

# For time sake, we limit the dataset to 500 data points, with about 20 confirmed fraud cases
# and about 30 blocked cases for diversity.

data = data.iloc[5500:6000]

X = data[[c for c in data.columns if c != 'transaction_status']]
Y = data['transaction_status']

# Split train/test data with ration 80/20.

XTrain, XTest, YTrain, YTest = ttsplit(X, Y, test_size=0.2)

importTrainData = fraudDetectionData()
importTestData = fraudDetectionData()

importTrainData.importData(XTrain, YTrain)
importTestData.importData(XTest, YTest)

XTrainNorm = importTrainData.normalizeData()
XTestNorm = importTestData.normalizeData()

# Longest part of the pipeline process, should be optimize in the future.

importTrainData.buildPseudoClasses()

YTrainFull = importTrainData.getLabels()

コード例 #10

0

ファイルを表示

            #stemming the word i.e. transforming it to the root word and adding it to the temp_sent variable
            temp_sent = temp_sent + " " + (stemming.stem(word))
    #appending the sentence to final_sent to get the stemmed data
    final_sent.append(temp_sent);
    #incrementing the variable
    a = a + 1

#converting the data into vectorized format
vect = CountVectorizer(stop_words="english", max_features=10000).fit(final_sent)
print("exit")
len(vect.get_feature_names())
train_vectorized = vect.transform(final_sent)
print("Stage 2 complete")

#splitting the data for training and testing
x_train,x_test,y_train,y_test = ttsplit(train_vectorized,youtube_train['tag'],test_size=0.25)
print("Stage 3 complete")

#Initializing Support Vector Machine
mysvm = SVC(kernel='linear')
#training the data
mysvm.fit(x_train,y_train)
#using the test data for predicting the results
prediction = mysvm.predict(x_test)
print("Stage 4 complete")
#printing the accuracy score
print(accuracy_score(y_test,prediction))
print("Stage 5 complete")

#printing the overall metrics
from sklearn import metrics

コード例 #11

0

ファイルを表示

def split_test(X, y):
    X, Xt, y, yt = ttsplit(X, y, test_size=0.33, random_state=42)

    X_ = []
    y_ = []
    for _, y_index in KFold(n_splits=4).split(X):
        X_.append(X[y_index])
        y_.append(y[y_index])

    # define model
    model = MultiOutputRegressor(XGBRegressor())
    model2 = MultiOutputRegressor(XGBRegressor())

    print("Test 1")
    model.fit(X, y)
    # make a prediction
    print("\tFit all X and y:", mean_squared_error(yt, model.predict(Xt)))

    model2.fit(X_[0], y_[0])
    model3 = deepcopy(model2)
    # make a prediction
    print("\tFit only X_[0] and y_[0]:",
          mean_squared_error(yt, model2.predict(Xt)))

    model3.partial_fit(X_[1], y_[1])
    model4 = deepcopy(model3)
    # make a prediction
    print("\tFit partial X_[1] and y_[1]:",
          mean_squared_error(yt, model3.predict(Xt)))

    model4.partial_fit(X_[2], y_[2])
    model5 = deepcopy(model4)
    # make a prediction
    print("\tFit partial X_[2] and y_[2]:",
          mean_squared_error(yt, model4.predict(Xt)))

    model5.partial_fit(X_[3], y_[3])
    # make a prediction
    print("\tFit partial X_[3] and y_[3]:",
          mean_squared_error(yt, model5.predict(Xt)))

    print("Test 2")

    # define model
    model = MultiOutputRegressor(XGBRegressor())
    model2 = MultiOutputRegressor(XGBRegressor())

    # fit all the model
    model.fit(X, y)
    # make a prediction
    print("\tFit all X and y:", mean_squared_error(yt, model.predict(Xt)))

    model2.fit(flatten(X_[0:1]), flatten(y_[0:1]))
    model3 = deepcopy(model2)
    # make a prediction
    print("\tFit only X_[0:1] and y_[0:1]:",
          mean_squared_error(yt, model2.predict(Xt)))

    model3.partial_fit(flatten(X_[0:2]), flatten(y_[0:2]))
    model4 = deepcopy(model3)
    # make a prediction
    print("\tFit partial X_[0:2] and y_[0:2]:",
          mean_squared_error(yt, model3.predict(Xt)))

    model4.partial_fit(flatten(X_[1:3]), flatten(y_[1:3]))
    model5 = deepcopy(model4)
    # make a prediction
    print("\tFit partial X_[1:3] and y_[1:3]:",
          mean_squared_error(yt, model4.predict(Xt)))

    model5.partial_fit(flatten(X_[2:4]), flatten(y_[2:4]))
    # make a prediction
    print("\tFit partial X_[2:4] and y_[2:4]:",
          mean_squared_error(yt, model5.predict(Xt)))

コード例 #12

0

ファイルを表示

        'max_depth': hp.quniform("max_depth", 3, 20, 1),
        'gamma': hp.uniform('gamma', 1, 9),
        'reg_alpha': hp.quniform('reg_alpha', 40, 180, 1),
        'reg_lambda': hp.uniform('reg_lambda', 0, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'min_child_weight': hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': hp.quniform("n_estimators", 100, 200, 5),
        'seed': 0
    }

    trials = Trials()

    best_hyperparams = fmin(fn=objective,
                            space=space,
                            algo=tpe.suggest,
                            max_evals=500,
                            trials=trials)

    print(best_hyperparams)
    pass


X_data, y_data = get_dataset("../agent/configs_xqn/buffer.csv")
X, Xt, y, yt = ttsplit(X_data, y_data, test_size=0.30, random_state=42)

print(f"Tamanho dos dados de treino: {len(X)}")
print(f"Tamanho dos dados de teste: {len(Xt)}")

#split_test(X,y)
hypertune_parameters()

コード例 #13

0

ファイルを表示

#######################################主程式(執行...)
data_Xtrain = np.load('X_train.npy')
data_Ytrain = np.load('y_train.npy')
data_Xtest = np.load('X_test.npy')
'''可調參數'''
imgdata_nmber = len(data_Xtrain)
imgsize = 110  #影像降解析(邊長)
drop_ = 0.01  #Dropout
batch = 100
epoch = 9
test_n = 0.3  #test_size
validation_n = 0.3  #validation_split

X = data_pross(data_Xtrain[0:imgdata_nmber], imgdata_nmber) / 255.0
Y = to_categorical(data_Ytrain[0:imgdata_nmber])
Xtrain, Xtest, ytrain, ytest = ttsplit(X, Y, random_state=20, test_size=test_n)

model_ = Xception_model(imgsize, channel=3, drop=drop_, class_=3)
history = model_.fit(Xtrain,
                     ytrain,
                     batch_size=batch,
                     epochs=epoch,
                     validation_split=validation_n)
model_.save('task1_cnn.h5')
######################################輸出成果+繪圖
'''成果輸出成 output.csv檔 '''
X__test = data_pross(data_Xtest, len(data_Xtest)) / 255.0
yfit = model_.predict_classes(X__test)
with open('output.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Index', 'Pred'])

コード例 #14

0

ファイルを表示

ファイル: nn.py プロジェクト: adb0045/PowerPuff

f = lambda x: 1 if x == True else 0 if x == False else 2 if x == "good" else 0 if x == "bad" else 1 if x == "neutral" or "Unknown" else x
adjusted = df_new.applymap(f)
a = adjusted.drop(columns=[
    'Gender', 'Unnamed: 0', 'Eye color', 'Hair color', 'Race', 'Publisher',
    'Height', 'Skin color', 'Weight'
])
a = a.dropna()

# Separate dependent and independent variables
X = a.iloc[:, 1:]
Y = a.iloc[:, 0]
yonehot = keras.utils.to_categorical(Y)
xfloat = X.astype('float32')

# Split training and testing data
trainx, testx, trainy, testy = ttsplit(xfloat, yonehot, test_size=.2)

# Set up NN
network = keras.models.Sequential()
network.add(keras.layers.Dense(3, input_dim=(167), activation='sigmoid'))

sgd = keras.optimizers.SGD(learning_rate=.1)
network.compile(optimizer=sgd,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Run NN
network.fit(trainx, trainy, epochs=30, batch_size=128)
predicty = network.predict(testx)

# Evaluate NN

コード例 #15

0

ファイルを表示

#Getting the stopwords corpora
stopwords_list = stopwords.words('english')
print(stopwords_list[:5])

#Loading the final merged dataset
youtube_train = pd.read_csv("C:/Users/prate/Desktop/ICT_solution/Data/final_data/final_data.csv",delimiter=',')
youtube_train_sen = youtube_train['video_title']
print(youtube_train_sen[1])

#Converting the sentences to string format
youtube_train_sen = youtube_train['video_title'].values
youtube_train_sen = youtube_train_sen.astype(str)

#splitting the data for training and testing
x_train,x_test,y_train,y_test = ttsplit(youtube_train_sen,youtube_train['tag'].values,test_size=0.25)

#Initializing tokenizer with max number of words set to 5000
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(x_train)

# fit the words to tokenized library
X_train = tokenizer.texts_to_sequences(x_train)
X_test = tokenizer.texts_to_sequences(x_test)

# Adding 1 because of  reserved 0 index
vocab_size = len(tokenizer.word_index) + 1

maxlen = 100

#Convert all sequences to equal lenghts with max lenght set to 100

コード例 #16

0

ファイルを表示

        model = load_model("model.h5", custom_objects={'Attention':Attention, 'binary_loss':binary_loss})

    print("Loading Word2Vec Dictionary. This may take a long time...")
    w2v = word2vec.KeyedVectors.load_word2vec_format(config.word2VecPath, binary=True)
    #w2v = pickle.load(open("word2vec.bin", "rb"))

    print("Loading Questions...")
    dataFile = "outBoth.csv"
    dataRows = pd.read_csv(dataFile)


    print('Extraction Training Features...')
    X_questions, X_captions, y, errors = extractFeatures(dataRows)
    y = np.array(y)

    X_questions_train, X_questions_test, X_captions_train, X_captions_test, y_train, y_test = ttsplit(X_questions, X_captions, y, test_size=0.25, random_state=1)

    best = test(model.predict([X_questions_test, X_captions_test]), y_test)

    print("Starting validation accuracy:", 100*best)
    print("Starting training accuracy:", 100*test(model.predict([X_questions_train, X_captions_train]), y_train))

    if len(sys.argv) > 1 and sys.argv[1] == "train":
        try:

            for epoch in range(1000):
                print("Epoch:")
                model.fit([X_questions_train, X_captions_train],y_train, batch_size=2500, epochs=1, verbose=1)

                print("Training accuracy:", 100*test(model.predict([X_questions_train, X_captions_train]), y_train))
                testacc = test(model.predict([X_questions_test, X_captions_test]), y_test)