コード例 #1
0
def get_feature_matrix(raw_data):
    """
    Convenience function converting a DataFrame to a feature matrix and a labels vector.

    Args:
        raw_data: DataFrame, containing raw data

    Returns:
        data: np.ndarray, feature matrix
        labels: np.ndarray, labels vector
    """

    data, labels = split_data(raw_data)
    data, labels = data.values, labels.values.ravel()

    return data, labels
コード例 #2
0
    def load_data(self):
        # data preprocessing
        '''
        input : (batch, max_step, input_features)
        target : (batch, target_features)
        '''

        # read the data set
        input_set, target_set = read_data('fill')

        # padding
        pad_input_set, seq_len = padding(input_set)

        print(pad_input_set[0])
        # split data set for model
        input_train, input_test, target_train, target_test, seq_train, seq_test = split_data(
            pad_input_set, target_set, seq_len, model)

        if mode == 'train':
            return input_train, target_train, seq_train
        elif mode == 'test':
            return input_test, target_test, seq_test
コード例 #3
0
    def load_data(self):
        # data preprocessing
        '''
        input : (batch, max_step, input_features)
        target : (batch, target_features)
        '''

        # read the data set
        input_set, target_set = read_data('fill')

        # padding
        pad_input_set, seq_len = padding(input_set)

        tsl_model_type = 'ensemble'

        # split data set for model
        value_train, value_test, target_train, target_test, seq_train, seq_test = split_data(
            pad_input_set, target_set, seq_len, tsl_model_type)

        if mode == 'train':
            return value_train, target_train, seq_train
        elif mode == 'test':
            return value_test, target_test, seq_test
def estimator_from_csv(shape_in: Tuple[int, int],
                       shape_out: Tuple[int],
                       file_csv: str,
                       feature_cols: Union[List[int], int] = 0,
                       batch_size: int = 10,
                       epochs: Optional[int] = 10,
                       steps: int = 1,
                       model_dir: str = r'..\tmp\test',
                       consistent_model: bool = True,
                       activate_tb: bool = False):
    """
    train & test read from csv
    :param shape_in:
    :param shape_out:
    :param file_csv:
    :param feature_cols:
    :param batch_size:
    :param epochs:
    :param steps:
    :param model_dir:
    :param consistent_model:
    :param activate_tb:
    :return:
    """
    n_in, n_out = shape_in[0], shape_out[0]

    model = create_compiled_model(shape_in=shape_in, shape_out=shape_out)
    model_dir = create_model_dir(model_dir, consistent_model=consistent_model)
    estimator = model_to_estimator(model, model_dir=model_dir)

    d = read_data_from_csv(file_csv)
    raw_trn_data, raw_tst_data = split_data(d)
    trn_fea, trn_lbl = to_supervised(raw_trn_data,
                                     n_in,
                                     n_out,
                                     feature_cols=feature_cols,
                                     label_col=0,
                                     is_train=True)
    tst_fea, tst_lbl = to_supervised(raw_tst_data,
                                     n_in,
                                     n_out,
                                     feature_cols=feature_cols,
                                     label_col=0,
                                     is_train=False)

    for _ in range(steps):
        estimator.train(
            input_fn=lambda: set_input_fn_csv(trn_fea,
                                              trn_lbl,
                                              batch_size=batch_size,
                                              num_epochs=epochs)
        )
        result = estimator.evaluate(
            input_fn=lambda: set_input_fn_csv(tst_fea,
                                              tst_lbl,
                                              batch_size=batch_size)
        )
        print(result)

    if activate_tb:
        launch_tb(model_dir)
    return estimator
コード例 #5
0
ファイル: lab3_realdata.py プロジェクト: mswiniars/ALS
pivot_table[row_pos, col_pos] = data[:, 2]
ratings = pivot_table

print(f"Number of ratings: {len(np.argwhere(ratings != 0))}")
print(f"Number of users: {len(rows)}")
print(f"Number of products: {len(cols)}")
nb_products = len(cols)

# Initialize U (users coeffs), P (products coeffs) matricies
np.random.seed(42)
U = 5 * np.random.rand(d, len(rows), dtype=np.float64)
P = 5 * np.random.rand(d, len(cols), dtype=np.float64)

# Spliting data into test and train set
training_ratings, test_ratings, nb_tests = split_data(ratings,
                                                      spliting_ratio=0.8,
                                                      seed=42)

training_losses = []
test_losses = []
start_als = timer()

for n in range(nb_iter):
    # ALS algorithm
    for u in range(len(rows)):
        I_u = np.argwhere(training_ratings[u] != 0).flatten()
        P_I_u = P[:, I_u]
        P_I_u_T = np.transpose(P_I_u)
        E = np.eye(d)
        A_u = np.dot(P_I_u, P_I_u_T) + lambd * E
        V_u = 0
コード例 #6
0
from data_preprocessing import load_data, process_data, split_data

# Parameters
MAX_DOC_LENGTH = 25
BATCH_SIZE = 256
EPOCHS = 5

# Loading train and test dataset
x_train, y_train = load_data("train_data.csv", sample_ratio=0.1)
x_test, y_test = load_data("test_data.csv", sample_ratio=0.1)

# Data preprocessing
x_train, x_test, _, n_vocab = process_data(x_train, x_test, MAX_DOC_LENGTH)

# Splitting dataset
x_test, x_val, y_test, y_val, _, test_size = split_data(x_test, y_test, 0.1)

# Model for training
model = Sequential()
model.add(Embedding((n_vocab + 1), 15, input_length=MAX_DOC_LENGTH))
model.add(Bidirectional(LSTM(15)))
model.add(Dropout(0.5))
model.add(Dense(15, activation='sigmoid'))
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print(
    '-------------------------------------Training Data------------------------------------\n'
)

model.fit(x_train,
          y_train,
コード例 #7
0
for i in lstm:
	for j in relu:
		for k in frame_batch:

			print(str(i)+'-'+str(j)+'-'+str(k))
			
			#features
			coo, feat_type = data_preprocessing.cooccurrence(dataset_detection_video, k)

			coint, feat_type = data_preprocessing.cointersection(dataset_detection_video, k)

			for index, video in enumerate(coint):
				video['sequence'] = np.concatenate((video['sequence'], coo[index]['sequence']),axis=1)

			#splitting train & test
			splitted_data = data_preprocessing.split_data(coint)

			# create the graph
			model.graph(splitted_data,i,j)

			# train & save 
			model.train(splitted_data, classlbl_to_classid, 60, 32, feat_type, k)
	



'''
#========PREDICTION============

# data loading (pickle)
dataset_detection_video, classlbl_to_classid = data_preprocessing.load_data()