Beispiel #1
0
def main():
    filename = "training_data.csv"
    n_hidden_nodes = [5]
    l_rate = 0.6
    n_epochs = 800
    n_folds = 4

    print("Neural network model:\n n_hidden_nodes = {}".format(n_hidden_nodes))
    print(" l_rate = {}".format(l_rate))
    print(" n_epochs = {}".format(n_epochs))
    print(" n_folds = {}".format(n_folds))

    print("\nReading '{}'...".format(filename))
    X, y = utils.read_csv(filename)
    utils.normalize(X)
    N, d = X.shape
    n_classes = len(np.unique(y))

    print(" X.shape = {}".format(X.shape))
    print(" y.shape = {}".format(y.shape))
    print(" n_classes = {}".format(n_classes))

    idx_all = np.arange(0, N)
    idx_folds = utils.crossval_folds(N, n_folds, seed=1)

    acc_train, acc_test = list(), list()
    print("\nTraining and cross-validating...")
    for i, idx_test in enumerate(idx_folds):
        idx_train = np.delete(idx_all, idx_test)
        X_train, y_train = X[idx_train], y[idx_train]
        X_test, y_test = X[idx_test], y[idx_test]

        model = NeuralNetwork(n_input=d,
                              n_output=n_classes,
                              n_hidden_nodes=n_hidden_nodes)
        model.train(X_train, y_train, l_rate=l_rate, n_epochs=n_epochs)

        y_train_predict = model.predict(X_train)
        y_test_predict = model.predict(X_test)

        acc_train.append(100 * np.sum(y_train == y_train_predict) /
                         len(y_train))
        acc_test.append(100 * np.sum(y_test == y_test_predict) / len(y_test))

        print(
            " Fold {}/{}: train acc = {:.2f}%, test acc = {:.2f}% (n_train = {}, n_test = {})"
            .format(i + 1, n_folds, acc_train[-1], acc_test[-1], len(X_train),
                    len(X_test)))

    print("\nAvg train acc = {:.2f}%".format(
        sum(acc_train) / float(len(acc_train))))
    print("Avg test acc = {:.2f}%".format(
        sum(acc_test) / float(len(acc_test))))
Beispiel #2
0
def compute_soft_predictions(contracted_tree, cuts, verbose):
    costs = np.exp(-normalize(cuts.costs))

    compute_soft_predictions_children(node=contracted_tree.root,
                                      cuts=cuts,
                                      costs=costs,
                                      verbose=verbose)
def read_prediction_gt(dname, fnames):
    images = []
    for fname in fnames:
        fname = os.path.join(dname, fname)
        image = load_image(fname)
        image = normalize(image)
        images.append(image)
    return torch.stack(images, dim=0)
Beispiel #4
0
 def test_unicode(self):
     x = [
         chr(8) + "YOU ARE DEAD1",  # chr(8) is backspace
         "☠️YOU ARE DEAD2",
     ]
     y = [normalize(e, do=['ctrl', 'unicode']) for e in x]
     z = ["YOU ARE DEAD1", "YOU ARE DEAD2"]
     for res, exp in zip(y, z):
         self.assertEqual(res, exp)
Beispiel #5
0
 def test_accents(self):
     x = [
         "Álvarez-Fernández",
     ]
     y = [normalize(e.lower(), do_not_remove='-') for e in x]
     z = [
         "Alvarez-Fernandez",
     ]
     for res, expect in zip(y, z):
         self.assertEqual(res, expect.lower())
Beispiel #6
0
    def __init__(self,
                 use_time=True,
                 use_err=True,
                 norm=True,
                 folded=True,
                 machine='local',
                 seq_len=150):
        """EROS light curves data loader"""
        if machine == 'local':
            root = local_root
        elif machine == 'colab':
            root = colab_root
        elif machine == 'exalearn':
            root = exalearn_root
        else:
            print('Wrong machine, please select loca, colab or exalearn')
            sys.exit()
        if not folded:
            data_path = ('%s/time_series/real' % (root) +
                         '/EROS2_lcs_B_meta_snr5_augmented_trim%i.pkl' %
                         (seq_len))
        else:
            data_path = ('%s/time_series/real' % (root) +
                         '/EROS2_lcs_B_meta_snr5_augmented_folded_trim%i.pkl' %
                         (seq_len))
        print('Loading from:\n', data_path)
        self.aux = joblib.load(data_path)
        self.lcs = self.aux['lcs'].astype(np.float32)
        self.meta = self.aux['meta']
        del self.aux
        self.labels = self.meta['Type'].values
        ## integer encoding of labels
        self.label_int_enc = preprocessing.LabelEncoder()
        self.label_int_enc.fit(self.labels)
        self.labels_int = self.label_int_enc.transform(self.labels)
        ## one-hot encoding of labels
        self.label_onehot_enc = preprocessing.OneHotEncoder(sparse=False,
                                                            categories='auto',
                                                            dtype=np.float32)
        self.label_onehot_enc.fit(self.labels.reshape(-1, 1))
        self.labels_onehot = self.label_onehot_enc.transform(
            self.labels.reshape(-1, 1))

        if use_time and not use_err:
            self.lcs = self.lcs[:, :, 0:2]
        if not use_time and not use_err:
            self.lcs = self.lcs[:, :, 1:2]

        if not 'folded' in data_path:
            self.lcs = return_dt(self.lcs)
        if norm:
            self.lcs = normalize(self.lcs,
                                 n_feat=self.lcs.shape[2],
                                 scale_to=[.0001, .9999],
                                 norm_time=use_time)
Beispiel #7
0
    def forward(self, input):
        gpu_ids = None
        if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 0:
            gpu_ids = range(self.ngpu)
            output = nn.parallel.data_parallel(self.main, input, gpu_ids)
        else:
            output = self.main(input)

        output = output.view(output.size(0), -1)
        if self.noise == 'sphere':
            output = utils.normalize(output)

        return output
Beispiel #8
0
    def run(self):
        '''Runs the algorithm for the image.'''
        image = imageio.imread(self.filename)

        if len(image.shape) == 3:
            img_grayscale = pu.to_grayscale(image)
        img = pu.normalize(np.min(img_grayscale), np.max(image), 0, 255,
                           img_grayscale)
        # HF part
        img_fft = fft2(img)  # img after fourier transformation
        img_sfft = fftshift(
            img_fft)  # img after shifting component to the center

        m, n = img_sfft.shape
        filter_array = np.zeros((m, n))

        for i in range(m):
            for j in range(n):
                filter_array[i, j] = 1.0 - np.exp(-((i - m / 2.0)**2 +
                                                    (j - n / 2.0)**2) /
                                                  (2 * (self.d0v**2)))
        k1 = 0.5
        k2 = 0.75
        high_filter = k1 + k2 * filter_array

        img_filtered = high_filter * img_sfft
        img_hef = np.real(ifft2(fftshift(img_filtered)))  # HFE filtering done

        # HE part
        # Building the histogram
        hist, bins = pu.histogram(img_hef)
        # Calculating probability for each pixel
        pixel_probability = hist / hist.sum()
        # Calculating the CDF (Cumulative Distribution Function)
        cdf = np.cumsum(pixel_probability)
        cdf_normalized = cdf * 255
        hist_eq = {}
        for i in range(len(cdf)):
            hist_eq[bins[i]] = int(cdf_normalized[i])

        for i in range(m):
            for j in range(n):
                image[i][j] = hist_eq[img_hef[i][j]]

        return image.astype(np.uint8)
Beispiel #9
0
    def run(self):
        image = imageio.imread(self.filename)

        if len(image.shape) > 2:
            image = pu.to_grayscale(image)

        normalized_image = pu.normalize(np.min(image), np.max(image), 0, 255,
                                        image)
        imageio.imwrite(
            os.path.join(self.results_path, "normalized_image.jpg"),
            normalized_image)

        start = timeit.default_timer()
        equalized_image = self.clahe(normalized_image)
        stop = timeit.default_timer()

        self.export_histogram(image, normalized_image, equalized_image)
        self.export_run_info(stop - start)

        return equalized_image
Beispiel #10
0
    def __init__(self,
                 use_time=True,
                 use_err=True,
                 norm=True,
                 colab=False,
                 seq_len=100):
        """EROS light curves data loader"""
        if colab:
            root = colab_root
        else:
            root = local_root
        if not folded:
            data_path = (
                '%s/time_series/synthetic' % (root) +
                '/sine_nsamples%i_seqlength%i_nbands%i_nsig%i_timespan%i_SNR%i_f0%s.npy'
                % (28000, 100, 1, 1, 4, 3, 'narrow'))
        print('Loading from:\n', data_path)
        self.aux = np.load(data_path, allow_pickle=True).item()
        self.lcs = self.aux['samples'].astype(np.float32)
        self.meta = aux['periods']
        del self.aux
        self.labels = np.random.randint(0, 5, self.lcs.shape[0])
        self.labels_onehot = pd.get_dummies(self.meta['Type']).values
        self.label_encoder = preprocessing.LabelEncoder()
        self.label_encoder.fit(self.labels)
        self.labels_int = self.label_encoder.transform(self.labels)

        if use_time and not use_err:
            self.lcs = self.lcs[:, :, 0:2]
        if not use_time and not use_err:
            self.lcs = self.lcs[:, :, 1:2]

        self.lcs = return_dt(self.lcs)
        if norm:
            self.lcs = normalize(self.lcs,
                                 n_feat=self.lcs.shape[2],
                                 scale_to=[-.9, .9],
                                 norm_time=use_time)
Beispiel #11
0
    def fit(self,
            data,
            label,
            iteration=10,
            step_size=0.001,
            regularization=0.0,
            test_pct=0.0,
            debug=1):
        """
        fit base on data and label
        """
        start = time.time()
        data = normalize(data)
        d_train, l_train, d_test, l_test = train_test_split(
            data, label, test_pct)
        for t in range(iteration):
            # computer gradient on weight
            output_train, loss_train, d_w, d_b = self.train_iteration(
                d_train, l_train, debug)
            # apply gradient on weight
            self.apply_graident(d_w, d_b, step_size, regularization)
            # all book keeping
            output_test, _ = self.compute_output(d_test)
            avg_loss_train = np.mean(loss_train)
            err_rate_train = np.mean(
                1 * (np.argmax(output_train, axis=1) != l_train))
            err_rate_test = np.mean(1 *
                                    (np.argmax(output_test, axis=1) != l_test))
            time_remain = (time.time() - start) / (t + 1) * (iteration - t - 1)

            if debug >= 0:
                debug_str = 'Iter:{0:4d} | Time:{1:4.2f} | TrainErr:{2:4.2f} | Test Err:{3:4.2f} | Loss:{4:4.2f}'.format(
                    t, time_remain, err_rate_train, err_rate_test,
                    avg_loss_train)
                print(debug_str, end='\r')

        print('\n\nTime total : {0}'.format(time.time() - start))
def main():
    # ===================================
    # Settings
    # ===================================
    filename = "data/seeds_dataset.csv"
    n_hidden_nodes = [
        5
    ]  # nodes in hidden layers i.e. [n_nodes_1, n_nodes_2, ...]
    l_rate = 0.6  # learning rate
    n_epochs = 800  # number of training epochs
    n_folds = 4  # number of folds for cross-validation

    print("Neural network model:\n n_hidden_nodes = {}".format(n_hidden_nodes))
    print(" l_rate = {}".format(l_rate))
    print(" n_epochs = {}".format(n_epochs))
    print(" n_folds = {}".format(n_folds))

    # ===================================
    # Read data (X,y) and normalize X
    # ===================================
    print("\nReading '{}'...".format(filename))
    X, y = utils.read_csv(filename)  # read as matrix of floats and int
    utils.normalize(X)  # normalize
    N, d = X.shape  # extract shape of X
    n_classes = len(np.unique(y))

    print(" X.shape = {}".format(X.shape))
    print(" y.shape = {}".format(y.shape))
    print(" n_classes = {}".format(n_classes))

    # ===================================
    # Create cross-validation folds
    # These are a list of a list of indices for each fold
    # ===================================
    idx_all = np.arange(0, N)
    idx_folds = utils.crossval_folds(N, n_folds, seed=1)

    # ===================================
    # Train and evaluate the model on each fold
    # ===================================
    acc_train, acc_test = list(), list()  # training/test accuracy score
    print("\nTraining and cross-validating...")
    for i, idx_test in enumerate(idx_folds):

        # Collect training and test data from folds
        idx_train = np.delete(idx_all, idx_test)
        X_train, y_train = X[idx_train], y[idx_train]
        X_test, y_test = X[idx_test], y[idx_test]

        # Build neural network classifier model and train
        model = NeuralNetwork(n_input=d,
                              n_output=n_classes,
                              n_hidden_nodes=n_hidden_nodes)
        model.train(X_train, y_train, l_rate=l_rate, n_epochs=n_epochs)

        # Make predictions for training and test data
        y_train_predict = model.predict(X_train)
        y_test_predict = model.predict(X_test)

        # Compute training/test accuracy score from predicted values
        acc_train.append(100 * np.sum(y_train == y_train_predict) /
                         len(y_train))
        acc_test.append(100 * np.sum(y_test == y_test_predict) / len(y_test))

        # Print cross-validation result
        print(
            " Fold {}/{}: train acc = {:.2f}%, test acc = {:.2f}% (n_train = {}, n_test = {})"
            .format(i + 1, n_folds, acc_train[-1], acc_test[-1], len(X_train),
                    len(X_test)))

    # ===================================
    # Print results
    # ===================================
    print("\nAvg train acc = {:.2f}%".format(
        sum(acc_train) / float(len(acc_train))))
    print("Avg test acc = {:.2f}%".format(
        sum(acc_test) / float(len(acc_test))))
Beispiel #13
0
def main():
    
    parser = argparse.ArgumentParser(description=
    "Use a variety of recurrent architectures for predicting solar sunpots as a time series\n"\
    "Example: python main_restructured.py --model_type [esn/linear_ar/rnn/lstm/gru] --dataset dynamo --train_file [full path to training data file] \
    --output_file [path to file containing predictions] --predict_cycle_num [index of cycle to be predicted] --grid_search [0/1] --compare_all [0/1] \n"
    "Description of different model types: \n"\
    "esn: echo state network,\n" \
    "linear_ar: linear autoregressive model, \n"\
    "rnn: simple recurrent network (vanilla RNN), \n" \
    "lstm: long-short term memory network, \n" \
    "gru: gated recurrent units (simplification of lstm architecture)", formatter_class=RawTextHelpFormatter)

    parser.add_argument("--model_type", help="Enter the desired model", default="esn", type=str)
    parser.add_argument("--dataset", help="Type of dataset used - (dynamo/solar_data/sinus)", default="dynamo", type=str)
    parser.add_argument("--train_file", help="Location of training data file", default=None, type=str)
    parser.add_argument("--output_file", help="Location of the output file", default=None, type=str)
    parser.add_argument("--predict_cycle_num", help="Cycle index to be predicted", default=None, type=int)
    parser.add_argument("--grid_search", help="Option to perform grid search or not (1 - True, 0 - False)", default=0, type=int)
    parser.add_argument("--compare_all", help="Option to compare all models or not (1 - True, 0 - False)", default=0, type=int)

    # Parse the arguments
    args = parser.parse_args() 
    model_type = args.model_type.lower()
    dataset = args.dataset
    train_file = args.train_file
    output_file = args.output_file
    use_grid_search = args.grid_search
    compare_all_models = args.compare_all
    predict_cycle_num = args.predict_cycle_num

    # Load the configurations required for training
    # It is assumed that the configurations are present in this location
    config_file = "./configurations_{}.json".format(dataset)  
    with open(config_file) as f:
        options = json.load(f)  # This loads options as a dict with keys that can be accessed
    
    # Load the training data
    data = np.loadtxt(train_file)

    # Keep a copy of the unnormalized data
    unnormalized_data = copy.deepcopy(data)
    data[:, 1], Xmax, Xmin = normalize(X=data[:, 1], feature_space=(0, 1))
    minimum_idx = get_minimum(data, dataset)
    
    #tau_chosen = 1 # Usually
    
    # In case running for a single model
    #TODO: Ensure that 'tr_verbose' is a calling parameter and usage is according to 'compare_all_models' flag
    #      Right now, the plot commands are commented out
    #TODO: Fix save model feature in case of comparison of all models
    if compare_all_models == 0:
        
        if model_type in ["linear_ar", "rnn", "lstm", "gru"]:
            tau_chosen = options[model_type]["output_size"]
            print("Tau chosen {}".format(tau_chosen))
        
        output_file = get_pred_output_file(output_file, model_type)
        if model_type == "esn":
            predictions_esn = train_model_ESN(options, model_type, data, minimum_idx, predict_cycle_num=predict_cycle_num, 
                                            tau=1, output_file=output_file)

        elif model_type == "linear_ar":
            predictions_ar = train_model_AR(options, model_type, data, minimum_idx, predict_cycle_num=predict_cycle_num, 
                                            tau=tau_chosen, output_file=output_file, use_grid_search=use_grid_search)

        elif model_type in ["rnn", "lstm", "gru"]:
            predictions_rnn = train_model_RNN(options, model_type, data, minimum_idx, predict_cycle_num=predict_cycle_num, 
                                            tau=tau_chosen, output_file=output_file, use_grid_search=use_grid_search, Xmax=Xmax, Xmin=Xmin)

    # In case running for all models
    elif compare_all_models == 1:
        
        config_file = "./configurations_{}_optimal_osize1_mbatch.json".format(dataset)
        with open(config_file) as f:
            options = json.load(f)  # This loads options as a dict with keys that can be accessed
        
        tau_chosen = options["gru"]["output_size"]
        orig_stdout = sys.stdout
        f_tmp = open('./results/compare_all_preds_{}_cycle{}_osize{}_logs_eps{}_mbatch_trial2.txt'.format(
            dataset, predict_cycle_num, tau_chosen, options["gru"]["num_epochs"]), 'a')
        sys.stdout = f_tmp
        
        #tau_chosen = options["gru"]["output_size"]
        predictions_esn, ytest = train_model_ESN(options, "esn", data, minimum_idx, predict_cycle_num=predict_cycle_num, 
                                        tau=1, output_file=get_pred_output_file(output_file, "esn"))
        predictions_ar = train_model_AR(options, "linear_ar", data, minimum_idx, predict_cycle_num=predict_cycle_num, tau=tau_chosen, 
                                        output_file=get_pred_output_file(output_file, "linear_ar"), use_grid_search=use_grid_search)
        predictions_vanilla_rnn = train_model_RNN(options, "rnn", data, minimum_idx, predict_cycle_num=predict_cycle_num, tau=tau_chosen, 
                                                output_file=get_pred_output_file(output_file, "rnn"), use_grid_search=use_grid_search)
        predictions_lstm = train_model_RNN(options, "lstm", data, minimum_idx, predict_cycle_num=predict_cycle_num, tau=tau_chosen, 
                                        output_file=get_pred_output_file(output_file, "lstm"), use_grid_search=use_grid_search,
                                        Xmax=Xmax, Xmin=Xmin)
        predictions_gru = train_model_RNN(options, "gru", data, minimum_idx, predict_cycle_num=predict_cycle_num, tau=tau_chosen, 
                                        output_file=get_pred_output_file(output_file, "gru"), use_grid_search=use_grid_search,
                                        Xmax=Xmax, Xmin=Xmin)
        
        compare_model_preds = {}
        compare_model_preds["original_test"] = list(np.float64(ytest))
        print("Original signal saved")
        compare_model_preds["pred_esn"] = list(np.float64(predictions_esn))
        print("ESN signal saved")
        compare_model_preds["pred_ar"] = list(np.float64(predictions_ar))
        print("Linear_AR signal saved")
        compare_model_preds["pred_rnn"] = list(np.float64(predictions_vanilla_rnn))
        print("RNN signal saved")
        compare_model_preds["pred_lstm"] = list(np.float64(predictions_lstm))
        print("LSTM signal saved")
        compare_model_preds["pred_gru"] = list(np.float64(predictions_gru))
        print("GRU signal saved")

        with open('./results/compare_all_preds_{}_cycle{}_osize{}_eps{}_mbatch_trial2.json'.format(dataset, predict_cycle_num, tau_chosen, options["gru"]["num_epochs"]), 'w') as f:
            f.write(json.dumps(compare_model_preds, cls=NDArrayEncoder, indent=2))

        # Plot the LSTM, GRU predictions
        #plt.figure(figsize=(15,10))
        '''
        plt.figure()
        plt.title("Compare predictions across models", fontsize=20)
        if len(ytest) > 0:

            plt.plot(ytest[:,0], ytest[:,1], '+-', label="actual test signal", color="orange")
            plt.plot(ytest[:,0], predictions_esn, 'o-', label="ESN prediction", color="red")
            plt.plot(ytest[:,0], predictions_ar, '+-', label="AR prediction", color="cyan")
            #plt.plot(ytest[:,0], predictions_vanilla_rnn, '.-', label="RNN prediction", color="pink")
            plt.plot(ytest[:,0], predictions_lstm, 'x-', label="LSTM prediction", color="blue")
            plt.plot(ytest[:,0], predictions_gru, '*-', label="GRU prediction", color="green")
            plt.legend(fontsize=16)

        else:

            plt.plot(predictions_esn, 'o-', label="ESN prediction", color="red")
            plt.plot(predictions_ar, '+-', label="AR prediction", color="cyan")
        #    plt.plot(predictions_vanilla_rnn, '.-', label="RNN prediction", color="pink")
            plt.plot(predictions_lstm, 'x-', label="LSTM prediction", color="blue")
            plt.plot(predictions_gru, '*-', label="GRU prediction", color="green")
            plt.legend(fontsize=16)

        plt.savefig('./log/ComparingPred_Cycle{}.pdf'.format(predict_cycle_num))
        plt.show()
        '''
        sys.stdout = orig_stdout
        f.close()
def evaluate_model(agent, data, verbose, window_size=10):
    total_profit = 0
    num_observations = len(data)

    shares = []
    history = []
    agent.inventory = []
    normed_data = normalize(data)
    cum_return = []
    net_holdings = 0
    shares_history = []
    pct_change = daily_pct_change(data.price, 10)

    for t in range(num_observations):
        done = t == (num_observations - 1)
        reward = 0

        state = get_state(normed_data, t)
        action = agent.action(state, evaluation=True)

        if action == 2 and net_holdings == 0:
            shares = -10
            net_holdings += -10
            history.append((data.price[t], "SELL"))
        elif action == 2 and net_holdings == 10:
            shares = -20
            net_holdings += -20
            history.append((data.price[t], "SELL"))
        elif action == 1 and net_holdings == 0:
            shares = 10
            net_holdings += 10
            history.append((data.price[t], "BUY"))
        elif action == 1 and net_holdings == -10:
            shares = 20
            net_holdings += 20
            history.append((data.price[t], "BUY"))
        else:
            shares = 0
            history.append((data.price[t], "HOLD"))
        shares_history.append(shares)

        reward = calc_reward(pct_change[t], net_holdings)
        total_profit += reward
        # if action == 1:
        #   agent.inventory.append(data.price[t])
        #   shares.append(1)
        #   history.append((data.price[t], "BUY"))

        #   if verbose:
        #     logging.debug(f"Buy at: {format_currency(data.price[t])}")

        # elif action == 2 and len(agent.inventory) > 0:
        #   purchase_price = agent.inventory.pop(0)
        #   delta = data.price[t] - purchase_price
        #   reward = delta
        #   total_profit += delta
        #   shares.append(-1)
        #   history.append((data.price[t], "SELL"))

        #   if verbose:
        #     logging.debug(f"Sell at: {format_currency(data.price[t])} | Position: {format_position(data.price[t] - purchase_price)}")

        # else:
        #   history.append((data.price[t], "HOLD"))
        #   shares.append(0)
        # cum_return.append(total_profit)

        if not done:
            next_state = get_state(normed_data, t + 1)
            agent.memory.append((state, action, reward, next_state, done))
            state = next_state

        if done: return total_profit, history, shares_history
def train_model(agent,
                episode,
                data,
                episode_count=50,
                batch_size=32,
                window_size=10):
    total_profit = 0
    num_observations = len(data)

    agent.inventory = []
    shares_history = []
    average_loss = []

    net_holdings = 0
    normed_data = normalize(data)
    pct_change = daily_pct_change(data.price, window_size)

    for t in tqdm(range(num_observations),
                  total=num_observations,
                  leave=True,
                  desc=f'Episode {episode}/{episode_count}'):
        done = t == (num_observations - 1)

        state = get_state(normed_data, t)
        action = agent.action(state)

        if action == 2 and net_holdings == 0:
            shares = -100
            net_holdings += -100
        elif action == 2 and net_holdings == 100:
            shares = -200
            net_holdings += -200
        elif action == 1 and net_holdings == 0:
            shares = 100
            net_holdings += 100
        elif action == 1 and net_holdings == -100:
            shares = 200
            net_holdings += 200
        else:
            shares = 0
        shares_history.append(shares)

        reward = calc_reward(pct_change[t] * 100, net_holdings)
        total_profit += reward

        # if action == 1: # Buy
        #   agent.inventory.append(data.price[t])
        #
        #   reward -= 1e-5 # Commission Penalty

        # elif action == 2 and len(agent.inventory) > 0: # Sell
        #   purchase_price = agent.inventory.pop(0)
        #   delta = data.price[t] - purchase_price
        #   reward = delta - 1e-5 # Commission Penalty
        #   total_profit += delta
        #   shares.append(-1)

        # else: # Hold
        #   shares.append(0)
        #   reward -= 1e-3

        if not done:
            next_state = get_state(normed_data, t + 1)
            agent.remember(state, action, reward, next_state, done)
            state = next_state

        if len(agent.memory) > batch_size:
            loss = agent.replay(batch_size)
            average_loss.append(loss)

        if episode % 10 == 0:
            agent.save(episode)

        if done:
            return (episode, episode_count, total_profit,
                    np.array(average_loss).mean())
def main():
    try:
        gConfig = getConfig('config/meta_rl.ini')  # get configuration
        # site = gConfig['site']

        mode = gConfig['mode']
        dataset_name = gConfig['dataset']

        data_dir = gConfig['data_dir']
        # features_dir = gConfig['features_dir']
        # infer_dir = gConfig['infer_dir']
        model_dir = gConfig['model_dir']
        output_dir = gConfig['output_dir']
        log_dir = gConfig['log_dir']

        train_num_epochs = gConfig['train_num_epochs']
        num_layers = gConfig['num_layers']
        num_hidden = gConfig['num_hidden']
        learning_rate = gConfig['learning_rate']
        learning_rate_decay_factor = gConfig['learning_rate_decay_factor']
        num_steps_per_decay = gConfig['num_steps_per_decay']
        num_episodes = gConfig['num_episodes']
        train_batch_size = gConfig['train_batch_size']
        exploration = gConfig['exploration']
        discount_factor = gConfig['discount_factor']
        num_child_steps_per_cycle = gConfig['num_child_steps_per_cycle']
        # max_depth = gConfig['max_depth']
        initial_filters = gConfig['initial_filters']
        # num_classes = gConfig['num_classes']

        optimizer = gConfig['optimizer']
        # dropout_keep_prob = gConfig['dropout_keep_prob']

        # port = gConfig['port']
        # certificate = gConfig['certificate']
        # resource_dir = gConfig['resources']

        if ('train' in mode):
            # specify GPU numbers to use get gpu and cpu devices
            cpu_devices = get_cpu_devices()
            gpu_devices = get_gpu_devices()
            if (len(gpu_devices) > 1):
                os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
                os.environ["CUDA_VISIBLE_DEVICES"] = str(gConfig["gpu_to_use"])

                print("The available GPU devices: " + str(gpu_devices))

                # devices, device_category = (gpu_devices, DeviceCategory.GPU) if len(gpu_devices) > 1 else (cpu_devices, DeviceCategory.CPU)

                # desc = "A Meta-Reinforcement Learning Approach to Optimise Parameters and Hyper-parameters Simultaneously"
                # parser = argparse.ArgumentParser(description=desc)
                #
                # parser.add_argument('--max_layers', default=2)
                #
                # args = parser.parse_args()
                # args.max_layers = int(args.max_layers)

            for dataset_ in dataset_name.split(','):  # datasets
                checkPathExists([
                    model_dir + '/' + dataset_ + '/', data_dir,
                    log_dir + '/' + dataset_ + '/',
                    output_dir + '/' + dataset_ + '/'
                ])

                # create logger
                _log.basicConfig(filename=log_dir + "/" + "log.txt",
                                 level=_log.DEBUG,
                                 format='%(asctime)s %(message)s',
                                 datefmt='%m/%d/%Y %I:%M:%S %p')
                logger = _log.getLogger("VoiceNet")
                logger.setLevel(_log.DEBUG)
                console = _log.StreamHandler()
                console.setLevel(_log.DEBUG)

                formatter = _log.Formatter(
                    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
                )  # create formatter
                console.setFormatter(formatter)
                logger.addHandler(console)

                data_format = ('channels_first'
                               if tf.test.is_built_with_cuda() else
                               'channels_last')

                # dataset preprocessing
                if 'mnist' == dataset_:
                    input_dimensions = '28x28x1'
                    num_classes = 10
                    max_depth = 9
                    train_batch_size = 32
                    # train_num_epochs = 20

                    # dataset = mnist_dataset.read_data_sets(data_dirs + '/', one_hot=True)
                    # train_x, train_y, test_x, test_y = np.reshape(mnist_dataset.train_images(data_dirs), [-1, 784]), mnist_dataset.train_labels(data_dirs), \
                    #                                    np.reshape(mnist_dataset.test_images(data_dirs), [-1, 784]), mnist_dataset.test_labels(data_dirs)
                    (train_x, train_y), (test_x, test_y) = mnist.load_data()

                    train_x = np.expand_dims(train_x, axis=-1)
                    test_x = np.expand_dims(test_x, axis=-1)

                    train_x, test_x = normalize(train_x, test_x)

                    train_y = to_categorical(train_y, num_classes)
                    test_y = to_categorical(test_y, num_classes)

                    if ('channels_first' in data_format):
                        train_x = train_x.transpose(0, 3, 1, 2)
                        test_x = test_x.transpose(0, 3, 1, 2)

                    num_episodes = (
                        len(train_x) // train_batch_size
                    ) * 100  # episodes = num_steps * num_epochs

                elif 'fashion_mst' == dataset_:
                    input_dimensions = '28x28x1'
                    num_classes = 10
                    max_depth = 9
                    # num_episodes = 30000
                    train_batch_size = 32
                    # train_num_epochs = 25

                    (train_x, train_y), (test_x,
                                         test_y) = fashion_mnist.load_data()

                    train_x = np.expand_dims(train_x, axis=-1)
                    test_x = np.expand_dims(test_x, axis=-1)

                    train_x, test_x = normalize(train_x, test_x)

                    train_y = to_categorical(train_y, num_classes)
                    test_y = to_categorical(test_y, num_classes)

                    if ('channels_first' in data_format):
                        train_x = train_x.transpose(0, 3, 1, 2)
                        test_x = test_x.transpose(0, 3, 1, 2)

                    num_episodes = (
                        len(train_x) // train_batch_size
                    ) * 100  # episodes = num_steps * num_epochs
                elif 'cifar10' == dataset_:
                    input_dimensions = '32x32x3'
                    num_classes = 10
                    max_depth = 9
                    # num_episodes = 35000
                    train_batch_size = 32
                    # train_num_epochs = 25

                    (train_x, train_y), (test_x, test_y) = cifar10.load_data()

                    train_x, test_x = normalize(train_x, test_x)

                    train_y = to_categorical(train_y, num_classes)
                    test_y = to_categorical(test_y, num_classes)

                    if ('channels_last' in data_format):
                        train_x = train_x.transpose(0, 2, 3, 1)
                        test_x = test_x.transpose(0, 2, 3, 1)

                    num_episodes = (
                        len(train_x) // train_batch_size
                    ) * 120  # episodes = num_steps * num_epochs
                elif 'cifar100' == dataset_:
                    input_dimensions = '32x32x3'
                    num_classes = 100
                    max_depth = 18
                    train_batch_size = 32
                    # num_episodes = 60000
                    # train_num_epochs = 35

                    (train_x, train_y), (test_x, test_y) = cifar100.load_data()

                    train_x, test_x = normalize(train_x, test_x)

                    train_y = to_categorical(train_y, num_classes)
                    test_y = to_categorical(test_y, num_classes)

                    if ('channels_last' in data_format):
                        train_x = train_x.transpose(0, 2, 3, 1)
                        test_x = test_x.transpose(0, 2, 3, 1)

                    num_episodes = (
                        len(train_x) // train_batch_size
                    ) * 150  # episodes = num_steps * num_epochs
                elif 'tiny_imagenet' == dataset_:
                    input_dimensions = '64x64x3'
                    num_classes = 200
                    max_depth = 18
                    train_batch_size = 32
                    # num_episodes = 80000
                    # train_num_epochs = 30

                    (train_x,
                     train_y), (test_x,
                                test_y) = tiny_imagenet.load_data(data_dir +
                                                                  '/' +
                                                                  dataset_)

                    train_x, test_x = normalize(train_x, test_x)

                    train_y = to_categorical(train_y, num_classes)
                    test_y = to_categorical(test_y, num_classes)

                    if ('channels_last' in data_format):
                        train_x = train_x.transpose(0, 2, 3, 1)
                        test_x = test_x.transpose(0, 2, 3, 1)

                    num_episodes = (
                        len(train_x) // train_batch_size
                    ) * 180  # episodes = num_steps * num_epochs

                np.random.seed(777)
                np.random.shuffle(train_x)
                np.random.seed(777)
                np.random.shuffle(train_y)

                dataset = [train_x, train_y, test_x,
                           test_y]  # pack the dataset for the Network Manager

                train(dataset,
                      dataset_name=dataset_,
                      model_dir=model_dir + '/' + dataset_ + '/',
                      num_episodes=num_episodes,
                      max_depth=max_depth,
                      initial_filters=initial_filters,
                      num_layers=num_layers,
                      num_hidden=num_hidden,
                      initial_learning_rate=learning_rate,
                      learning_rate_decay_factor=learning_rate_decay_factor,
                      train_batch_size=train_batch_size,
                      test_batch_size=1,
                      train_num_epochs=train_num_epochs,
                      input_dimensions=input_dimensions,
                      num_classes=num_classes,
                      optimizer=optimizer,
                      num_steps_per_decay=num_steps_per_decay,
                      num_child_steps_per_cycle=num_child_steps_per_cycle,
                      exploration=exploration,
                      discount_factor=discount_factor,
                      log_dir=log_dir + '/' + dataset_ + '/',
                      output_dir=output_dir + '/' + dataset_ + '/',
                      logger=logger)

        # elif ('test' in mode):
        #     # 61, 24, 60,  5, 57, 55, 59, 3
        #     evaluate("5, 32, 2,  5, 3, 64, 2, 3", "model", data_dirs)
        elif ('analysis' in mode):
            plt.figure(figsize=(10, 10))

            plt.rcParams.update({'font.size': 6})

            count = 1
            for dataset_ in dataset_name.split(','):  # datasets
                # checkPathExists(['plots/' + dataset_ + '/'])
                checkPathExists(['plots/'])
                dataset = load_dataset(output_dir + '/' + dataset_ + '/' +
                                       dataset_ + '_results.csv')
                plot(dataset['policy_episode'], dataset['policy_loss'],
                     dataset['reward'], dataset['network_accuracy'], 0,
                     10000, "Episodes", "Policy Loss",
                     dataset_.replace('_', ' '), count,
                     "plots/" + dataset_ + "/episod_accuracy.png")
                count += 1

            # plt.savefig("plots/results.png")
            # plt.gca().yaxis.set_minor_formatter(NullFormatter())
            # Adjust the subplot layout, because the logit one may take more space
            # than usual, due to y-tick labels like "1 - 10^{-3}"
            plt.subplots_adjust(top=0.92,
                                bottom=0.22,
                                left=0.1,
                                right=0.6,
                                hspace=0.25,
                                wspace=0.35)

            plt.savefig("plots/results.pdf", bbox_inches='tight')
            plt.close()

            plt.figure()
            plt.rcParams.update({'font.size': 8})
            dataset = load_dataset(output_dir + '/cifar10/cifar10_results.csv')
            plot_cifar10(dataset['time_taken'], dataset['network_accuracy'], 0,
                         720, "Time (minutes)",
                         "Network validation accuracy (%)", '',
                         "plots/cifar10_time_accuracy.pdf")

    except Exception as ex:
        print("main function failed - " + str(ex))
        raise ex
Beispiel #17
0
def run_bench(algos, repetition, test_size, trainset, testset, drop_cols):
    results = {
        'label': {},
        'accuracy': defaultdict(lambda: []),
        'fit': defaultdict(lambda: []),
        'predict': defaultdict(lambda: []),
        'confusion': defaultdict(lambda: []),
    }
    for i in range(repetition):
        function = 0
        dataset = Dataset(test_size=test_size,
                          trainset=trainset,
                          testset=testset,
                          drop_cols=drop_cols)
        for algo, name in algos:
            instance = algo(dataset)
            for result in instance.test():
                label = '%s\n(%s)' % (name, result['function'])
                accuracy = result['accuracy'] * 100
                fit = result['fit_duration']
                predict = result['predict_duration']
                confusion = result['confusion']
                print(
                    '%s:\n\taccuracy: %9.6f %%\n\tfit:      %9.6f s\n\tpredict:  %9.6f s\n\tconfusion: \n%s'
                    % (label, accuracy, fit, predict, confusion))
                results['label'][function] = label
                results['accuracy'][function].append(accuracy)
                results['fit'][function].append(fit)
                results['predict'][function].append(predict)
                results['confusion'][function].append(confusion)
                function += 1

    labelTrans = ['non-spam', 'spam']
    algoNames = list(results['label'].values())
    fitMeans = np.mean(list(results['fit'].values()), axis=1)
    predictMeans = np.mean(list(results['predict'].values()), axis=1)
    cmMeans = [
        normalize(np.mean(m, axis=0))
        for m in list(results['confusion'].values())
    ]
    cmInterleaved = np.reshape(cmMeans, (-1, 2)).reshape((2, -1), order='F')
    n_groups = len(algoNames)

    # create plot
    fig, ax1 = plt.subplots()
    y_pos = range(1, n_groups + 1)
    bar_width = 0.35
    opacity = 0.5

    # duration axis with 2 bars
    ax1.set_ylabel('Duration (s)')
    ax1.bar(y_pos,
            predictMeans,
            bar_width,
            bottom=fitMeans,
            alpha=opacity,
            color='g',
            label='Predict')
    ax1.bar(y_pos, fitMeans, bar_width, alpha=opacity, color='b', label='Fit')
    ax1.legend(loc=2)  # add the legend in the top left corner

    # instantiate a second axes that shares the same x-axis
    # accuracy axis with a boxplot
    ax2 = ax1.twinx()
    ax2.set_ylabel('Accuracy (%)')
    ax2.boxplot(list(results['accuracy'].values()))

    plt.title('Algorithm comparision (%d executions)' % repetition)
    ax1.set_xticklabels(
        algoNames)  # set ticks and labels on ax1 (otherwise it does not work)
    ax1.tick_params(axis='x', which='major',
                    labelsize=7)  # reduce size of x labels
    plt.tight_layout()

    plt.figure()
    data = cmInterleaved * 100
    ax3 = sn.heatmap(data,
                     yticklabels=labelTrans,
                     xticklabels=algoNames * 2,
                     annot=True,
                     fmt='.0f',
                     vmin=0,
                     vmax=100)
    ax3.tick_params(axis='x', which='major',
                    labelsize=7)  # reduce size of y labels
    plt.title('Confusion Matrix (%)')
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.subplots_adjust(left=0.21, right=1, top=0.92)

    plt.show()
Beispiel #18
0
def main():

    parser = argparse.ArgumentParser(description=
    "Use a variety of recurrent architectures for predicting solar sunpots as a time series\n"\
    "Example: python main_gs.py --model_type [esn/linear_ar/rnn/lstm/gru] --dataset dynamo --train_file [full path to training data file] \
    --output_file [path to file containing predictions] --test_file [path to test file (if any)] \
    --verbosity [1 or 2] \n"
    "Description of different model types: \n"\
    "esn: echo state network,\n" \
    "linear_ar: linear autoregressive model, \n"\
    "rnn: simple recurrent network (vanilla RNN / Elman unit), \n" \
    "lstm: long-short term memory network, \n" \
    "gru: gated recurrent units (simplification of lstm architecture)", formatter_class=RawTextHelpFormatter)
    parser.add_argument("--model_type",
                        help="Enter the desired model",
                        default="esn",
                        type=str)
    parser.add_argument(
        "--dataset",
        help="Type of dataset used - (dynamo/solar_data/sinus)",
        default="dynamo",
        type=str)
    parser.add_argument("--train_file",
                        help="Location of training data file",
                        default=None,
                        type=str)
    parser.add_argument("--output_file",
                        help="Location of the output file",
                        default=None,
                        type=str)
    parser.add_argument("--verbose",
                        help="Verbosity (0 or 1)",
                        default=0,
                        type=int)
    #parser.add_argument("--test_file", help="(Optional) Location of the test data file", default=None, type=str)
    parser.add_argument("--predict_cycle_num",
                        help="Cycle index to be predicted",
                        default=None,
                        type=int)
    parser.add_argument(
        "--grid_search",
        help="Option to perform grid search or not (1 - True, 0 - False",
        default=0,
        type=int)

    # Parse the arguments
    args = parser.parse_args()
    model_type = args.model_type.lower()
    dataset = args.dataset
    train_file = args.train_file
    output_file = args.output_file
    verbose = args.verbose
    use_grid_search = args.grid_search

    # test_file = args.test_file
    predict_cycle_num = args.predict_cycle_num

    # Load the configurations required for training
    # It is assumed that the configurations are present in this location
    config_file = "./configurations_{}.json".format(dataset)

    with open(config_file) as f:
        options = json.load(
            f)  # This loads options as a dict with keys that can be accessed

    # Load the training data
    data = np.loadtxt(train_file)
    # Keep a copy of the unnormalized data
    unnormalized_data = copy.deepcopy(data)
    data[:, 1], Xmax, Xmin = normalize(X=data[:, 1], feature_space=(0, 1))
    minimum_idx = get_minimum(data, dataset)
    #data[:, 1] = np.diff(data[:,1], prepend=data[0, 1])

    # Get multiple step ahead prediction datasets : #NOTE: Only for Linear_AR so far
    if model_type == "esn":

        X, Y = get_msah_training_dataset(data,
                                         minimum_idx=minimum_idx,
                                         tau=1,
                                         p=1)

        # predict cycle index = entered predict cycle num - 1
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

        options["esn"]["tau"] = len(ytest) - 1
        options["esn"]["history_q"] = options["esn"]["tau"] + 1
        model = load_model_with_opts(options, model_type)

        # Concat data
        xtrain_ct = concat_data(xtrain, col=-1)
        ytrain_ct = concat_data(ytrain, col=-1)

        #tr_data_signal = xtrain_ct[:, -1].reshape((-1, 1))
        #te_data_signal = ytest[:, -1].reshape((-1, 1))

        # pred of q values
        predictions, te_data_signal, pred_indexes = train_and_predict_ESN(
            model, train_data=xtrain_ct, test_data=ytest)

        # Saving prediction results
        save_pred_results(output_file=output_file,
                          predictions=predictions,
                          te_data_signal=te_data_signal)

    elif model_type == "linear_ar":

        # Load the model with corresponding options
        if use_grid_search == 0:

            model = load_model_with_opts(options, model_type)
            X, Y = get_msah_training_dataset(data,
                                             minimum_idx=minimum_idx,
                                             tau=1,
                                             p=options[model_type]["num_taps"])
            # predict cycle index = entered predict cycle num - 1
            xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)
            # pred of q values
            predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(
                model,
                xtrain,
                ytrain,
                ytest,
                tr_to_val_split=0.9,
                tr_verbose=True)
            plot_predictions(
                predictions=predictions_ar,
                ytest=ytest,
                title="AR model predictions with {} taps for cycle index {}".
                format(options[model_type]["num_taps"], predict_cycle_num))

            # Save prediction results in a txt file
            save_pred_results(output_file=output_file,
                              predictions=predictions_ar,
                              te_data_signal=ytest[:, -1])

        elif use_grid_search == 1:

            Error_dict = {}
            test_predictions = []
            test_error_optimal = []
            nval = 1
            num_total_cycles = len(np.diff(minimum_idx))
            #predict_cycle_num_array = list(np.arange(num_total_cycles-nval, num_total_cycles))
            predict_cycle_num_array = [predict_cycle_num]
            params = {"num_taps": list(np.arange(10, 50, 2))}  # For Dynamo
            #params = {"num_taps":list(np.arange(5, 50, 2))} # For Solar
            #TODO: Fix array nature of optimal_num_taps_all
            optimal_num_taps_all, training_errors_all, val_errors_all, test_errors_all = grid_search_AR_all_cycles(
                data=data,
                solar_indices=minimum_idx,
                model_type=model_type,
                options=options,
                params=params,
                predict_cycle_num_array=predict_cycle_num_array)

            Error_dict["validation_errors_with_taps"] = [
                (float(params["num_taps"][i]), *val_errors_all[:, i])
                for i in range(val_errors_all.shape[1])
            ]

            plt.figure()
            plt.plot(params["num_taps"],
                     val_errors_all[0],
                     label="Validation MSE")
            plt.plot(params["num_taps"],
                     training_errors_all[0],
                     label="Training MSE")
            plt.ylabel("MSE")
            plt.xlabel("Number of taps")
            plt.legend()
            plt.title("Error (MSE) vs number of taps")
            plt.show()

            if type(optimal_num_taps_all) != list:
                optimal_num_taps_all = [optimal_num_taps_all]

            Error_dict["optimal_num_taps"] = [
                float(*optimal_num_taps_all)
            ]  #NOTE: Object of int64 is not json serializable

            # Retrain the model again with the optimal value
            for i, optimal_num_taps in enumerate(optimal_num_taps_all):

                options[model_type]["num_taps"] = optimal_num_taps
                model = load_model_with_opts(options, model_type)
                X, Y = get_msah_training_dataset(data,
                                                 minimum_idx=minimum_idx,
                                                 tau=1,
                                                 p=optimal_num_taps)
                xtrain, ytrain, ytest = get_cycle(
                    X, Y, icycle=predict_cycle_num_array[i])
                # pred of q values
                predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(
                    model,
                    xtrain,
                    ytrain,
                    ytest,
                    tr_to_val_split=0.75,
                    tr_verbose=True)
                test_predictions.append(predictions_ar.tolist())
                if len(ytest) > 0:

                    plot_predictions(
                        predictions=predictions_ar,
                        ytest=ytest,
                        title=
                        "AR model predictions with {} taps for cycle index {}".
                        format(optimal_num_taps, predict_cycle_num_array[i]))
                    test_error_optimal.append(test_error)

                else:

                    resolution = np.around(np.diff(data[:, 0]).mean(), 1)
                    plt.figure()
                    plt.plot(data[:minimum_idx[-1], 0], data[:minimum_idx[-1],
                                                             1], 'r+-')
                    plt.plot(
                        np.arange(ytrain[-1][-1][0] + resolution,
                                  ((len(predictions_ar)) * resolution) +
                                  ytrain[-1][-1][0], resolution),
                        predictions_ar, 'b*-')
                    plt.legend(['Original timeseries', 'Future prediction'])
                    plt.title(
                        'Plot of original timeseries and future predictions')
                    plt.show()

            Error_dict["Test_predictions"] = test_predictions
            if len(test_error_optimal) > 0:
                Error_dict["Test_error"] = [test_error_optimal]
            else:
                Error_dict["Test_error"] = []

            with open(
                    './log/grid_search_results_{}_cycle{}.json'.format(
                        dataset, predict_cycle_num_array[i]), 'w+') as fp:
                json.dump(Error_dict, fp, indent=2)

            #TODO: To fix saving result files properly
            save_pred_results(output_file=output_file,
                              predictions=predictions_ar,
                              te_data_signal=ytest[:, -1])

    elif model_type in ["rnn", "lstm", "gru"]:

        # In case parameter tuning is not carried out
        if use_grid_search == 0:

            # Load the model with the corresponding options
            model = load_model_with_opts(options, model_type)

            #NOTE: Obtain the data and targets by heuristically setting p
            num_taps_rnn = 22
            X, Y = get_msah_training_dataset(data,
                                             minimum_idx=minimum_idx,
                                             tau=1,
                                             p=num_taps_rnn)

            # Get xtrain, ytrain, ytest
            xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

            # Pred of q values
            predictions_rnn, test_error, val_error, tr_error = train_and_predict_RNN(
                model,
                xtrain,
                ytrain,
                ytest,
                tr_to_val_split=0.90,
                tr_verbose=True)
            if len(ytest) > 0:

                # Normalized predictions in [0, 1]
                plot_predictions(
                    predictions=predictions_rnn,
                    ytest=ytest,
                    title="{} model predictions with {} taps for cycle index {}"
                    .format(model_type, num_taps_rnn, predict_cycle_num))

                # Unnormalized predictions in original scale
                ytest_un = np.copy(ytest)
                ytest_un[:, -1] = unnormalize(ytest[:, -1], Xmax, Xmin)
                plot_predictions(
                    predictions=unnormalize(predictions_rnn, Xmax, Xmin),
                    ytest=ytest_un,
                    title=
                    "{} model predictions (unnormalized) with {} taps for cycle index {}"
                    .format(model_type, num_taps_rnn, predict_cycle_num))

                # Save prediction results in a txt file
                save_pred_results(output_file=output_file,
                                  predictions=predictions_rnn,
                                  te_data_signal=ytest[:, -1])
            else:

                plot_future_predictions(
                    data=data,
                    minimum_idx=minimum_idx,
                    ytrain=ytrain,
                    predictions=predictions_rnn,
                    title=
                    "Plot of original timeseries and future predictions for {} for cycle index {}"
                    .format(model_type, predict_cycle_num))

                plot_future_predictions(
                    data=unnormalized_data,
                    minimum_idx=minimum_idx,
                    ytrain=ytrain,
                    predictions=unnormalize(predictions_rnn, Xmax, Xmin),
                    title=
                    "Plot of original unnormalized timeseries and future predictions for {} for cycle index {}"
                    .format(model_type, predict_cycle_num))

                # Save prediction results in a txt file
                save_pred_results(output_file=output_file,
                                  predictions=predictions_rnn,
                                  te_data_signal=ytest)

        elif use_grid_search == 1:

            gs_params = {"n_hidden": [30, 40, 50]}

            gs_list_of_options = create_list_of_dicts(options=options,
                                                      model_type=model_type,
                                                      param_dict=gs_params)

            print("Grid Search to be carried over following {} configs:\n".
                  format(len(gs_list_of_options)))
            val_errors_list = []

            for i, gs_option in enumerate(gs_list_of_options):

                print("Config:{} is \n{}".format(i + 1, gs_option))
                # Load the model with the corresponding options
                model = RNN_model(
                    input_size=gs_option["input_size"],
                    output_size=gs_option["output_size"],
                    n_hidden=gs_option["n_hidden"],
                    n_layers=gs_option["n_layers"],
                    num_directions=gs_option["num_directions"],
                    model_type=gs_option["model_type"],
                    batch_first=gs_option["batch_first"],
                    lr=gs_option["lr"],
                    device=gs_option["device"],
                    num_epochs=gs_option["num_epochs"],
                )

                #NOTE: Obtain the data and targets by heuristically setting p
                num_taps_rnn = 22
                X, Y = get_msah_training_dataset(data,
                                                 minimum_idx=minimum_idx,
                                                 tau=1,
                                                 p=num_taps_rnn)

                # Get xtrain, ytrain, ytest
                xtrain, ytrain, ytest = get_cycle(X,
                                                  Y,
                                                  icycle=predict_cycle_num)

                # Pred of q values
                predictions_rnn, _, val_error, tr_error = train_and_predict_RNN(
                    model,
                    xtrain,
                    ytrain,
                    ytest,
                    tr_to_val_split=0.90,
                    tr_verbose=True)
                gs_option["Validation_Error"] = val_error
                gs_option["Training_Error"] = tr_error

                val_errors_list.append(gs_option)

            with open(
                    'gs_results_{}_cycle_{}.json'.format(
                        model_type, predict_cycle_num), 'w') as f:
                f.write(json.dumps(val_errors_list, indent=2))
Beispiel #19
0
    def __init__(self, operator_type, operator_order, num_gridpoints, grid_spacing=1.0 ):
        """
        Constructor for Smoothness_operator class

        Args:
            operator_type (str): 
                The type of operator. Accepts one of the following values:
                    '1d_bilateral'
                    '1d_periodic'
                    '2d_bilateral'
                    '2d_periodic'

            operator_order (int): 
                The order of the operator.

            num_gridpoints: 
                The number of gridpoints in each dimension of the domain.
        """
        
        # Make sure grid_spacing is valid
        if not isinstance(grid_spacing, float):
            raise ControlledError('/Laplacian/ grid_spacing must be a float: grid_spacing = %s' % type(grid_spacing))
        if not (grid_spacing > 0):
            raise ControlledError('/Laplacian/ grid_spacing must be > 0: grid_spacing = %s' % grid_spacing)
        
        if '1d' in operator_type:
            self._coordinate_dim = 1

            # Make sure operator_type is valid
            if operator_type == '1d_bilateral':
                periodic = False
            elif operator_type == '1d_periodic':
                periodic = True
            else:
                raise ControlledError('/Laplacian/ Cannot identify operator_type: operator_type = %s' % operator_type)
                
            self._type = operator_type
            
            self._sparse_matrix, self._kernel_basis = \
                laplacian_1d(num_gridpoints, operator_order, grid_spacing, periodic)
            
            self._G = self._kernel_basis.shape[0]
            self._kernel_dim = self._kernel_basis.shape[1]
            self._alpha = operator_order

        elif '2d' in operator_type:
            self._coordinate_dim = 2

            assert( len(num_gridpoints)==2 )
            assert( all([isinstance(n,utils.NUMBER) for n in num_gridpoints]) )

            assert( len(grid_spacing)==2 )
            assert( all([isinstance(n,utils.NUMBER) for n in grid_spacing]) )

            if operator_type == '2d_bilateral':
                periodic = False
            elif operator_type == '2d_periodic':
                periodic = True
            else:
                raise ControlledError('ERROR: cannot identify operator_type.')

            
            self._type = operator_type
            
            self._sparse_matrix, self._kernel_basis = \
                laplacian_2d( num_gridpoints, 
                              operator_order, 
                              grid_spacing, 
                              periodic=periodic, 
                              sparse=True,
                              report_kernel=True)

            self._Gx = int(num_gridpoints[0])
            self._Gy = int(num_gridpoints[1])
            self._G = self._Gx * self._Gy
            self._alpha = operator_order
            assert( self._G == self._kernel_basis.shape[0] )
            self._kernel_dim = self._kernel_basis.shape[1]

        else:
            raise ControlledError('/Laplacian/ Cannot identify operator_type: operator_type = %s' % operator_type)

        # Compute spectrum, and set lowest rank eigenvectors as kernel
        self._dense_matrix = self._sparse_matrix.todense()
        eigenvalues, eigenvectors = eigh(self._dense_matrix)
        self._eigenvalues = eigenvalues
        self._eigenbasis = utils.normalize(eigenvectors)
        #self._kernel_basis = self._eigenbasis[:,:self._kernel_dim]

        # Set kernel eigenvalues and eigenvectors
        self._eigenvalues[:self._kernel_dim] = 0.0
        self._eigenbasis[:,:self._kernel_dim] = self._kernel_basis
Beispiel #20
0
 def test_html_tags(self):
     x = "the parasite <i>Plasmodium</i> in Ca<sup>2+</sup> was"
     y = normalize(x, do_not_remove='+')
     z = "the parasite plasmodium in ca2+ was"
     self.assertEqual(y, z)
     self.assertNotEqual(x, z)