def get_feature_matrix(raw_data): """ Convenience function converting a DataFrame to a feature matrix and a labels vector. Args: raw_data: DataFrame, containing raw data Returns: data: np.ndarray, feature matrix labels: np.ndarray, labels vector """ data, labels = split_data(raw_data) data, labels = data.values, labels.values.ravel() return data, labels
def load_data(self): # data preprocessing ''' input : (batch, max_step, input_features) target : (batch, target_features) ''' # read the data set input_set, target_set = read_data('fill') # padding pad_input_set, seq_len = padding(input_set) print(pad_input_set[0]) # split data set for model input_train, input_test, target_train, target_test, seq_train, seq_test = split_data( pad_input_set, target_set, seq_len, model) if mode == 'train': return input_train, target_train, seq_train elif mode == 'test': return input_test, target_test, seq_test
def load_data(self): # data preprocessing ''' input : (batch, max_step, input_features) target : (batch, target_features) ''' # read the data set input_set, target_set = read_data('fill') # padding pad_input_set, seq_len = padding(input_set) tsl_model_type = 'ensemble' # split data set for model value_train, value_test, target_train, target_test, seq_train, seq_test = split_data( pad_input_set, target_set, seq_len, tsl_model_type) if mode == 'train': return value_train, target_train, seq_train elif mode == 'test': return value_test, target_test, seq_test
def estimator_from_csv(shape_in: Tuple[int, int], shape_out: Tuple[int], file_csv: str, feature_cols: Union[List[int], int] = 0, batch_size: int = 10, epochs: Optional[int] = 10, steps: int = 1, model_dir: str = r'..\tmp\test', consistent_model: bool = True, activate_tb: bool = False): """ train & test read from csv :param shape_in: :param shape_out: :param file_csv: :param feature_cols: :param batch_size: :param epochs: :param steps: :param model_dir: :param consistent_model: :param activate_tb: :return: """ n_in, n_out = shape_in[0], shape_out[0] model = create_compiled_model(shape_in=shape_in, shape_out=shape_out) model_dir = create_model_dir(model_dir, consistent_model=consistent_model) estimator = model_to_estimator(model, model_dir=model_dir) d = read_data_from_csv(file_csv) raw_trn_data, raw_tst_data = split_data(d) trn_fea, trn_lbl = to_supervised(raw_trn_data, n_in, n_out, feature_cols=feature_cols, label_col=0, is_train=True) tst_fea, tst_lbl = to_supervised(raw_tst_data, n_in, n_out, feature_cols=feature_cols, label_col=0, is_train=False) for _ in range(steps): estimator.train( input_fn=lambda: set_input_fn_csv(trn_fea, trn_lbl, batch_size=batch_size, num_epochs=epochs) ) result = estimator.evaluate( input_fn=lambda: set_input_fn_csv(tst_fea, tst_lbl, batch_size=batch_size) ) print(result) if activate_tb: launch_tb(model_dir) return estimator
pivot_table[row_pos, col_pos] = data[:, 2] ratings = pivot_table print(f"Number of ratings: {len(np.argwhere(ratings != 0))}") print(f"Number of users: {len(rows)}") print(f"Number of products: {len(cols)}") nb_products = len(cols) # Initialize U (users coeffs), P (products coeffs) matricies np.random.seed(42) U = 5 * np.random.rand(d, len(rows), dtype=np.float64) P = 5 * np.random.rand(d, len(cols), dtype=np.float64) # Spliting data into test and train set training_ratings, test_ratings, nb_tests = split_data(ratings, spliting_ratio=0.8, seed=42) training_losses = [] test_losses = [] start_als = timer() for n in range(nb_iter): # ALS algorithm for u in range(len(rows)): I_u = np.argwhere(training_ratings[u] != 0).flatten() P_I_u = P[:, I_u] P_I_u_T = np.transpose(P_I_u) E = np.eye(d) A_u = np.dot(P_I_u, P_I_u_T) + lambd * E V_u = 0
from data_preprocessing import load_data, process_data, split_data # Parameters MAX_DOC_LENGTH = 25 BATCH_SIZE = 256 EPOCHS = 5 # Loading train and test dataset x_train, y_train = load_data("train_data.csv", sample_ratio=0.1) x_test, y_test = load_data("test_data.csv", sample_ratio=0.1) # Data preprocessing x_train, x_test, _, n_vocab = process_data(x_train, x_test, MAX_DOC_LENGTH) # Splitting dataset x_test, x_val, y_test, y_val, _, test_size = split_data(x_test, y_test, 0.1) # Model for training model = Sequential() model.add(Embedding((n_vocab + 1), 15, input_length=MAX_DOC_LENGTH)) model.add(Bidirectional(LSTM(15))) model.add(Dropout(0.5)) model.add(Dense(15, activation='sigmoid')) model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print( '-------------------------------------Training Data------------------------------------\n' ) model.fit(x_train, y_train,
for i in lstm: for j in relu: for k in frame_batch: print(str(i)+'-'+str(j)+'-'+str(k)) #features coo, feat_type = data_preprocessing.cooccurrence(dataset_detection_video, k) coint, feat_type = data_preprocessing.cointersection(dataset_detection_video, k) for index, video in enumerate(coint): video['sequence'] = np.concatenate((video['sequence'], coo[index]['sequence']),axis=1) #splitting train & test splitted_data = data_preprocessing.split_data(coint) # create the graph model.graph(splitted_data,i,j) # train & save model.train(splitted_data, classlbl_to_classid, 60, 32, feat_type, k) ''' #========PREDICTION============ # data loading (pickle) dataset_detection_video, classlbl_to_classid = data_preprocessing.load_data()