Esempio n. 1
0
    def prepare_dataset(self):
        genre_index = 1
        number_of_genres = len(self.genres)
        for genre in self.genres:
            self.my_logger.info(
                "[+] Creating dataset for genre {} ({}/{})".format(
                    genre, genre_index, number_of_genres))
            # TODOx dataset_name in this case?
            dataset_name = "{}_{}".format(genre, self.user_args.run_id)
            # TODOx look inside
            # fixmeX
            # TODOx run inspection
            GetDataset(genre, config.slice_size, config.dataset_path,
                       dataset_name,
                       self.active_config.path_to_slices_for_training,
                       self.user_args, self.genres).start()
            genre_index += 1

        # TODOx look inside
        all_dataset = self.load_dataset_back_to_memory()
        # TODOx look inside
        array_of_array_of_data_points = self.zip_again(all_dataset)
        array_of_data_points = [
            data_point
            for array_of_data_points in array_of_array_of_data_points
            for data_point in array_of_data_points
        ]
        return array_of_data_points
Esempio n. 2
0
def run(stock: str, model_type: str, stationary=True):
    df = Analysis.get_data(stock)
    df["Company stock name"] = stock.split('/')[-1].split('.')[0]
    dataset = GetDataset(df)
    dataset.get_dataset(scale=False, stationary=stationary)
    train_data, test_data, train_data_len = dataset.split(train_split_ratio=0.8, time_period=30)
    train_data, test_data = dataset.get_torchdata()
    x_train, y_train = train_data
    x_test, y_test = test_data

    if model_type == 'lstm':
        params = rnn_params
        model = TorchRNN(rnn_type=params.rnn_type, input_dim=params.input_dim,
                         hidden_dim=params.hidden_dim, output_dim=params.output_dim,
                         num_layers=params.num_layers)
    elif model_type == 'transformer':
        params = transf_params
        model = TransformerModel(params)
    else:
        raise ValueError('Wrong model type selection, select either "rnn" or "transformer"!')

    clf = Classifier(model)
    clf.train([x_train, y_train], params=params)
    y_scaler = dataset.y_scaler
    predictions = clf.predict([x_test, y_test], y_scaler, data_scaled=False)
    predictions = pd.DataFrame(predictions)
    predictions.reset_index(drop=True, inplace=True)
    predictions.index = df.index[-len(x_test):]
    predictions['Actual'] = y_test[:-1]
    predictions.rename(columns={0: 'Predictions'}, inplace=True)
    if stationary:
        predictions = Analysis.inverse_stationary_data(old_df=df, new_df=predictions,
                                                       orig_feature='Actual', new_feature='Predictions',
                                                       diff=12, do_orig=False)
    plot_predictions(df, train_data_len, predictions["Predictions"].values, model_type)
def start_test_real():
    # TODOx task check again
    # TODOx config.run_id might be wrong. Indeed it is wrong
    # TODOx remove comment out
    dataset_name = "{}_{}".format(config.unknown_genre,
                                  user_args.run_id_real_test)
    # TODOx look inside user_args for run_id use case
    dataset = GetDataset(config.unknown_genre, config.slice_size,
                         config.dataset_path, dataset_name,
                         active_config.path_to_slices_for_testing, user_args,
                         genres).start()
    test_task = Test(user_args, dataset, model, path_to_model)
    test_task.predict()
    test_task.rearrange_result_file()
Esempio n. 4
0
from dataset import GetDataset
from model import GetModel

# Train params
epochs = 40
input_shape = (64, 64, 3)
num_classes = 4
batch_size = 128

# Get the dataset and the model
(train, val) = GetDataset(batch_size)
model = GetModel(input_shape, num_classes)

# Steps to use in fit_generator
train_steps = train.n / train.batch_size
val_steps = val.n / val.batch_size

# Compile settings
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['acc'])

# The big deal
history = model.fit_generator(generator=train,
                              steps_per_epoch=train_steps,
                              validation_data=val,
                              validation_steps=val_steps,
                              epochs=epochs,
                              verbose=1)

# Save the model after train
Esempio n. 5
0
def run_experiment(base_dataset_path,
                   dataset_name,
                   test_size,
                   n_cal,
                   alpha=0.1,
                   experiment=0,
                   bbox_method='NNet',
                   out_dir = './results',
                   random_state=2020,
                   n_jobs=1,
                   verbose = False):
    
    # Set random seed
    np.random.seed(random_state)
    # Random state for this experiment
    random_state = 2020 + experiment
        
    X, Y = GetDataset(dataset_name, base_dataset_path)

    # Add noise to response
    Y += 1e-6*np.random.normal(size=Y.shape)
    
    # if transform_y:
        # Y = np.log(1 + Y - min(Y))
    
    y_min = min(Y)
    y_max = max(Y)
    
    if X.shape[0] <= 2*n_cal + test_size:
        return
    
    # Determine output file
    out_file = out_dir + "/summary.csv"
    print(out_file)
    
    # Set random seed
    random.seed(random_state)
    np.random.seed(random_state)
    torch.manual_seed(random_state)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_state)
    

    # Split the data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,
                                                           random_state=random_state)
    
    X_train, X_calib, Y_train, Y_calib = train_test_split(X_train, Y_train, test_size=n_cal,
                                                                   random_state=random_state)
    
        
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_calib = scaler.transform(X_calib)
    X_test = scaler.transform(X_test)
    
    
    n_train = X_train.shape[0]
    assert(n_cal == X_calib.shape[0])
    n_test = X_test.shape[0]
    
    results = pd.DataFrame()
    results_full = pd.DataFrame()
    
    if len(X.shape) == 1:
        n_features = 1
    else:
        n_features = X.shape[1]
        
    y_step = 1000 # maybe larger than 1000
    
    # Initialize the black-box and the conformalizer
    if bbox_method == 'NNet':

        epochs =  2000
        lr = 0.0005
        batch_size = n_train
        dropout = 0.1
    
        grid_quantiles = np.arange(0.01,1.0,0.01)
        bbox = QNet(grid_quantiles, n_features, no_crossing=True, batch_size=batch_size,
                                dropout=dropout, num_epochs=epochs, learning_rate=lr, calibrate=1, 
                                verbose=verbose)
    elif bbox_method == 'RF':
        n_estimators = 100
        min_samples_leaf=50
#        min_samples_leaf = 20
        #max_features = x_train.shape[1]
        grid_quantiles = np.arange(0.01,1.0,0.01)
        bbox = QRF(grid_quantiles, n_estimators=n_estimators,
                     min_samples_leaf=min_samples_leaf, random_state=2020, n_jobs=n_jobs, verbose=verbose)
    
    
    # Train the black-box model
    bbox.fit(X_train, Y_train)
    
    # Define list of methods to use in experiments
    methods = {
       'CHR'         : CHR(bbox, ymin=y_min, ymax=y_max, y_steps=1000, randomize=True),
       'DistSplit'   : DistSplit(bbox, ymin=y_min, ymax=y_max),
       'DCP'         : DCP(bbox, ymin=y_min, ymax=y_max),
       'CQR'         : CQR(bbox),
       'CQR2'        : CQR2(bbox)
      }

    for method_name in methods:
        # Apply the conformalization method
        method = methods[method_name]
        method.calibrate(X_calib, Y_calib, alpha)
        # Compute prediction on test data
        pred = method.predict(X_test)
        
        # Evaluate results
        res = evaluate_predictions(pred, Y_test, X=X_test)
        # Add information about this experiment
        res['Box'] = bbox_method
        res['Dataset'] = dataset_name
        res['Method'] = method_name
        res['Experiment'] = experiment
        res['Nominal'] = 1-alpha
        res['n_train'] = n_train
        res['n_cal'] = n_cal
        res['n_test'] = n_test
        
        
        # Add results to the list
        results = results.append(res)

        # Write results on output files
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        
        results.to_csv(out_file, index=False, float_format="%.4f")
        print("Updated summary of results on\n {}".format(out_file))
        sys.stdout.flush()

    return results
Esempio n. 6
0
# Input arguments
dataset_name = str(sys.argv[1])
bbox_method = str(sys.argv[2])
experiment = int(sys.argv[3])

print(dataset_name)
print(bbox_method)
print(experiment)

# Set random seed
np.random.seed(random_state)
# Random state for this experiment
random_state = 2020 + experiment

X, Y = GetDataset(dataset_name, base_dataset_path)

# Add noise to response
Y += 1e-6 * np.random.normal(size=Y.shape)

# if transform_y:
# Y = np.log(1 + Y - min(Y))

y_min = min(Y)
y_max = max(Y)

if X.shape[0] <= 2 * n_cal + test_size:
    raise

# Temporary file to store black-box model
tmp_file = tmp_dir + "/dataset_" + dataset_name + "_bbox_" + bbox_method + "_exp_" + str(