Exemple #1
0
def train(model,
          training_data,
          num_epochs=10,
          lr=1e-2,
          batch_size=1,
          validation_data=None):

    # Build the dataset and get the dataloader for the training data
    X_train, y_train = training_data
    train_minibatches = utils.load_data(X_train, y_train, batch_size=10)

    # Validation data
    X_valid, y_valid = validation_data
    valid_minibatches = utils.load_data(X_valid,
                                        y_valid,
                                        batch_size=len(y_valid))

    history = {'training_loss': [], 'validation_loss': []}

    # This assumes that the model has a layer called input
    W = model.layer.weight
    b = model.layer.bias

    # Main optimization loop
    for epoch in range(num_epochs):
        # Loop over all mini-batches
        batch_loss = []
        for inputs, targets in train_minibatches:

            # Compute the predicted outputs
            outputs = inputs.mm(W) + b

            # Evaluate the difference between the known targets
            # and the predicted targets
            loss = mse_loss(outputs, targets)

            # Optimization step
            #W = W - lr * g
            #b = b - lr * b

            # Add the loss for this mini-batch to the array of losses
            batch_loss.append(loss)

        # The loss for each epoch is the average loss observed for all mini-batches
        avg_loss = torch.tensor(batch_loss).mean().item()

        history['training_loss'].append(avg_loss)
        # Evaluate on the validation data
        print(f'Epoch {epoch}: {avg_loss}')

        # Validation loss/error
        for x_valid, y_valid in valid_minibatches:
            print(x_valid.size())
            pred = model(x_valid)
            err = F.mse_loss(pred, y_valid)
            err = err.item()
            history['validation_loss'].append(err)

    return history
Exemple #2
0
def augment_data(train_filename, new_train_filename):
	products_data_map, products_name_map = get_product_data()
	data = load_data(train_filename)

	generated_data = flatten([generate_data_from_product(sku, product['name']) for sku, product in products_data_map.items()])
	generated_data_df = pd.DataFrame(generated_data, columns=['user', 'sku', 'category', 'query', 'click_time', 'query_time'])
	
	new_train = pd.concat([data, generated_data_df]).sample(frac=1).reset_index(drop=True)

	save_data(new_train, new_train_filename)
Exemple #3
0
def get_dataset(path, features, normData=True, FourTransform=True, windowSize=None, mv_avg=10, beanFunc=Package):
    '''
    path里面记载了一段时间区间里的通信记录
    本方法读出一行记录,使用beanFunc把记录转化成一个描述通信信息的对象P(表示一个时间点,对某一个通信的记录),
    记录到数据库DB中,
    DB的结构:
        db:(N,T,D)的array实际的数据
        getConnectId([id1,id2]):返回idi对应的通信表示
        search(connectid):返回和connectid对于的[id1,id2....],这里采用默认模糊查询
    本方法还会:
        1.对DB.db做标准化处理(normData=True)
        2.对DB.db,使用选用windowSize的窗口大小,分段分析特征的频谱(DFT)
    返回:
        db:DB对象
        np_db:数据数据处理后的db.db
    :param path: 
    :param features: 
    :param normData: 
    :param FourTransform: 
    :param windowSize: 
    :return: 
    np_db:(N,T,D),D=len(features),如果FourTransform=False,表示原始特征.否则是FFT换后的特征,
    对于FourTransform=False,np_db[:,t,d]表示在t秒的d特征的通信信息
    '''
    db = load_data(path, features, beanFunc)
    np_db = db.db
    # v=np.ones((mv_avg))/mv_avg
    # N,T,D=np_db.shape
    # for n in range(N):
    #     for d in range(D):
    #         np_db[n,:,d]=np.convolve(np_db[n,:,d],v,'same')

    if normData:
        np_db = standardizeData(np_db, 'STD')
    if FourTransform:
        np_db, feature_size, steps = fourierAnalysis(np_db, windowSize=windowSize)
        np_db = np.abs(np_db)
    return db, np_db
Exemple #4
0
from sklearn.externals import joblib
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

from data.utils import load_data
from preprocessing import preprocess_data
from visualization import plot_learning_curves, get_errors_input
from metrics import custom_map_at_k
from feature_selection import get_features_extractor
from data_augmentation import augment_data

print('Augmenting training data set')
augment_data('train.csv', 'train_augmented.csv')

print('Loading training and testing set')
train_data = load_data('train_augmented.csv')
test_data = load_data('test.csv')

print('Preprocessing')
X_train, Y_train = preprocess_data(train_data)
X_test, Y_test = preprocess_data(test_data)

model_name = 'lr'

# print('Loading model')
# model = joblib.load('./models/' + model_name + '_classifier.pkl')
print('Fitting model')
model = Pipeline([
	('features', get_features_extractor()),
	('LogisticRegression', LogisticRegression())
])
Exemple #5
0
from data.utils import load_data
from matplotlib import pyplot as plt
from src.layers import *

path = r"../data/"
pp_net = load_data(path, [])['pp_adj'].tocoo()
indices = torch.LongTensor(
    np.concatenate((pp_net.col.reshape(1, -1), pp_net.row.reshape(1, -1)),
                   axis=0))
indices = remove_bidirection(indices, None)

n_node = pp_net.shape[0]
n_edge = indices.shape[1]

rd = np.random.binomial(1, 0.9, n_edge)
train_mask = rd.nonzero()[0]
test_mask = (1 - rd).nonzero()[0]

train_indices = indices[:, train_mask]
train_indices = to_bidirection(train_indices, None)

test_indices = indices[:, test_mask]
test_indices = to_bidirection(test_indices, None)

train_n_edge = train_indices.shape[1]
test_n_edge = test_indices.shape[1]

hid1 = 32
hid2 = 16

x = sparse_id(n_node)
Exemple #6
0
from keras.regularizers import l1_l2

import keras.metrics

import numpy as np
from data.utils import load_data
from gcnlayer import GraphConvolution
from feature_eng import enhance_features, normalize
from utils import preprocess_adj
from custom_losses import crossentropy_weighted


from scipy.sparse.csgraph import laplacian as scipy_laplacian
N_EPOCHS = 1000

A, X, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    'cora')
X = X.A

n_features = X.shape[1]
n_vertex = A.shape[0]


A_ = preprocess_adj(A)
graph = [X, A_]


def get_model_kipf():
    adj = Input(shape=(None, None), batch_shape=(None, None), sparse=False)
    inp = Input(shape=(n_features,))

    H = Dropout(0.5)(inp)
Exemple #7
0
def train(model,
          training_data,
          num_epochs=10,
          lr=1e-2,
          batch_size=1,
          validation_data=None):

    model.double()

    # Function being minimized
    loss_fn = nn.MSELoss()

    # Optimization algorithm being used to minimize the loss
    optimizer = optim.RMSprop(model.parameters(), lr=lr)

    # Build the dataset and get the dataloader for the training data
    X_train, y_train = training_data
    train_minibatches = utils.load_data(X_train, y_train, batch_size=10)

    # Validation data
    X_valid, y_valid = validation_data
    valid_minibatches = utils.load_data(X_valid,
                                        y_valid,
                                        batch_size=len(y_valid))

    history = {'training_loss': [], 'validation_loss': []}

    # Main optimization loop
    for epoch in range(num_epochs):
        # Loop over all mini-batches
        batch_loss = []
        for inputs, targets in train_minibatches:

            # Compute the predicted outputs
            outputs = model(inputs)

            # Evaluate the difference between the known targets
            # and the predicted targets
            loss = loss_fn(outputs, targets)

            # Optimization step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Add the loss for this mini-batch to the array of losses
            batch_loss.append(loss.item())

        # The loss for each epoch is the average loss observed for all mini-batches
        avg_loss = torch.tensor(batch_loss).mean().item()

        history['training_loss'].append(avg_loss)
        # Evaluate on the validation data
        print(f'Epoch {epoch}: {avg_loss}')

        # Validation loss/error
        for x_valid, y_valid in valid_minibatches:
            print(x_valid.size())
            pred = model(x_valid)
            err = F.mse_loss(pred, y_valid)
            err = err.item()
            history['validation_loss'].append(err)

    return history