Python split 예제들, dataset.split Python 예제들

예제 #1

0

파일 보기

파일: trainer.py 프로젝트: thearrow/cafeteria-dash

def train(exp):
    data = dataset.build()
    training, validation = dataset.split(data, 0.80)

    print "Data loaded! Training..."
    exp.train(training, validation, optimize="nag", learning_rate=0.0001, momentum=0.5)

    print "Saving network to net.data..."
    exp.save("net.data")

예제 #2

0

파일 보기

def main():
    configure_logger()
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    labels = {format(i, "05d"): i for i in range(1, SAMPLE_NUMBER + 1)}
    all_ids = list(labels.keys())
    partition = dataset.split(all_ids)

    # Dataset loading params
    params = {'batch_size': 64, 'shuffle': True, 'num_workers': 6}

    logger.info("Generating training set...")
    training_set = FlyingChairsDataset(DATA_FOLDER, partition["train"])
    training_generator = data.DataLoader(training_set, **params)

    logger.info("Generating validation set...")
    validation_set = FlyingChairsDataset(DATA_FOLDER, partition["val"])
    validation_generator = data.DataLoader(validation_set, **params)

    logger.info("Starting training")

    max_epochs = 10
    for epoch in range(max_epochs):
        logger.info("Epoch {}".format(epoch))
        for img1, img2, flo in training_generator:
            # Transfer to GPU
            img1, img2, flo = img1.to(device), img2.to(device), flo.to(device)

            # Model computations
            print("I did it i feel like i went through a portal")

        # Validation
        with torch.set_grad_enabled(False):
            for img1, img2, flo in validation_generator:
                # Transfer to GPU
                img1, img2, flo = img1.to(device), img2.to(device), flo.to(
                    device)

                # Model computations
                print("I'm here")

예제 #3

0

파일 보기

def load_data(fn, data_path, spit_date):
    ratings_ = dataset.load(fn, path=data_path, delim=',')
    ratings = dataset.parse_timestamp(ratings_)
    # rename ratings columns
    ratings = ratings.rename(
        columns={
            "userId": "user_id",
            "movieId": "item_id",
            "rating": "rating",
            "datetime": "datetime"
        })
    # Movielese data stats
    print("ratings columns: {}".format(ratings.columns))
    print("No of rows in ratings df: {}".format(ratings.shape[0]))
    print("Min datetime: {}, max datetime: {}".format(
        ratings["datetime"].min(), ratings["datetime"].max()))
    split_time = pd.datetime.strptime(spit_date, '%Y-%m-%d %H:%M:%S.%f')
    # split train/test folds
    train_df, test_df = dataset.split(ratings, split_time)
    print("Size of train dataset: {} & size of test dataset: {}".format(
        train_df.shape[0], test_df.shape[0]))
    print(ratings.head(5))
    return train_df, test_df

예제 #4

0

파일 보기

파일: mlp_sgd_inner.py 프로젝트: KeViNOne/MLNotebook

if __name__ == '__main__':
	data_file = 'mnist.pkl'
	learning_rate = 0.001
	epochs = 10000
	valid_ratio = 0.2
	batch_size = 100
	borrow = True
	
	data = dataset.load(data_file)
	m, n = data[0].shape
	k = np.max(data[1]) + 1
	s = int(m * (1 - valid_ratio))
	print('data:', data[0].shape, data[1].shape, m, n, k, s)
	
	train_x, valid_x, train_y, valid_y = dataset.split(dataset.pick(data, m, random = False), s)
	del data
	
	trainer = MLPTrainer(
		(train_x, train_y), 
		s, n, k, 100,
		valid_data = (valid_x, valid_y)
	)
	
	trainer.train(
		epochs = epochs, 
		learning_rate = learning_rate, 
		batch_size = batch_size
	)
	
	pass

예제 #5

0

파일 보기

파일: visualize.py 프로젝트: Qix-/gradtype

        entry = pairs.get(dataset.LABELS[i])
        if entry is None:
            continue
        if len(entry['x']) != 2:
            continue

        color = COLOR_MAP(to_color(i))
        plt.plot(entry['x'], entry['y'], color=color)

    if fname == None:
        plt.show()
    else:
        plt.savefig(fname=fname)
        print("Saved image to " + fname)


if __name__ == '__main__':
    import sys

    datasets = dataset.parse()
    siamese, _, _ = gradtype_model.create()
    siamese.load_weights(sys.argv[1])

    train_datasets, validate_datasets = dataset.split(datasets)
    train_datasets = dataset.trim_dataset(train_datasets)
    validate_datasets = dataset.trim_dataset(validate_datasets)
    train_coords = dataset.evaluate_model(siamese, train_datasets)
    validate_coords = dataset.evaluate_model(siamese, validate_datasets)
    fname = sys.argv[2] if len(sys.argv) >= 3 else None
    pca(train_coords, validate_coords, fname)

예제 #6

0

파일 보기

        perc_tot = (c1 + c2) * 100 / (ft + et)
        perc_full = float("{0:.2f}".format(perc_full))
        perc_empty = float("{0:.2f}".format(perc_empty))
        perc_tot = float("{0:.2f}".format(perc_tot))

        # write
        with open(FILE_NAME, 'a') as f:
            f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(
                dataset, model_name, c1, w2, ft, w1, c2, et, perc_full,
                perc_empty, perc_tot))


# REPEAT FOR ALL THE MODELS
for data in tqdm(os.listdir(MODEL_DIR)):
    if not data.startswith('.'):
        model_name = data.split('.')
        model_ext = model_name[-1]
        model_graph = '.'.join(model_name[:-1])
        if model_ext == 'data-00000-of-00001':
            eval(model_graph)

with open(FILE_NAME, 'a') as f:
    f.write('{}\n'.format(NOTES))

    ########## PLOT DATA and SHOW PREDICTIONS ##########
    #plot the data
# fig = plt.figure()

# #reset the graph
# tf.reset_default_graph()
# conv2_convnet = conv2.convnet([None,IMG_SIZE,IMG_SIZE,1], 'input', LR)

예제 #7

0

파일 보기

파일: main.py 프로젝트: alphadose/Optum-Stratethon

import model
import performance
import dataset

if __name__ == "__main__":
    # We begin by downloading the data. The data will be in the form of
    # "events" data: each datapoint for each patient will be a recorded event.
    X, Y = download.download()

    # The event data is reformatted. This is done by selecting the given
    # variables and  transforming time-dependent events to a path.
    X = reformat.reformat(X,
                          static_variables=["Age", "Gender"],
                          dynamic_variables=["Creatinine", "Glucose"])

    # Now, we normalise the data.
    X = normalise.normalise(X)

    # We extract features from the input data.
    features = features.extract(X)

    # The dataset is now split into a training and testing set.
    features_train, Y_train, features_test, Y_test = dataset.split(
        features, Y, proportion=0.75)

    # We now train the model with the selected features.
    classifier = model.train(features_train, Y_train)

    # We evaluate performance of the model now.
    performance.evaluate(classifier, features_test, Y_test)

예제 #8

0

파일 보기

import dataset
import model as gradtype_model
import utils as gradtype_utils

TOTAL_EPOCHS = 2000000

# Save weights every `SAVE_EPOCHS` epochs
SAVE_EPOCHS = 50

#
# Prepare dataset
#

print('Loading dataset')
datasets = dataset.parse()
train_datasets, validate_datasets = dataset.split(datasets, 'regression')

validate_x = dataset.gen_regression(validate_datasets)
validate_y = gradtype_model.generate_one_hot_regression(validate_x['labels'])

#
# Load model
#

siamese, _, model = gradtype_model.create()
start_epoch = gradtype_utils.load(siamese, 'gradtype-regr-')

adam = Adam(lr=0.001)


def top_5(y_true, y_pred):

예제 #9

0

파일 보기

    f'\n\nLambda {best_lamda}: accuracy={acc}\trecall={rec}\tprecision={pre}')
print('*************\n\n')

# 5 - PCA
"""
Con el mejor modelo obtenido en los puntos anteriores se hace fit con una entrada formada por dos componentes
principales.
"""

# 5.a - Aplicación de PCA con dos componentes principales
print('Reducción de dimensiones con PCA')
X, y = data.array_X_y()
pca = PCA(X)
pca_X = pca.fit(2)
pca.plot()
X_train, X_test, y_train, y_test = split(pca_X, y, 0.8)
print(f'Formato del dataset de entrenamiento: {X_train.shape}')
print(f'Formato de etiquetas : {y_train.shape}')

print("\n\nFit del modelo con dos componentes principales")
logistic_regression = LogisticRegression(best_bias)
logistic_regression.fit(X_train, y_train.reshape(-1, 1), best_lr, b, epochs,
                        best_lamda)
predictions = logistic_regression.predict(X_test)
for metric in metrics:
    name = metric.__class__.__name__
    results[name] = metric(y_test, predictions[:, 0])
    print('{metric}: {value}'.format(metric=name, value=results[name]))
print('*************\n\n')
print('FINALIZADO')