Esempi in Python per load_data, esempi in Python per src.load_data.load_data

Esempio n. 1

0

Mostra file

def run_features(args):
    with open(args.config, "r") as f:
        config = yaml.load(f)

    if args.csv is not None:
        df = load_data(how="csv", csv=dict(path=args.csv))
    elif "load_data" in config:
        df = load_data(**config["load_data"])
    else:
        raise ValueError("Path to CSV for input data must be provided through --csv or "
                         "'load_data' configuration must exist in config file")

    df = generate_features(df, **config["generate_features"])

Esempio n. 2

0

Mostra file

File: LSTM_LOWHIGH_SHORT.py Progetto: anotherotherme/pricePrediction

def nextDayPrediction(typeBlockchain, stock):    
    df = get_data.get_data_frame5minutes(typeBlockchain, stock)
    df = df[['open', 'low', 'high', 'close','volume', 'date_time']][-int(df.shape[0]/ 8 * 3):]
    df.index = df.date_time
    df = df.sort_index()

    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    all_df = df.copy()

    x = all_df[['low', 'high']].copy()

    y = all_df['close'].copy()


    x[['low', 'high']] = x_scaler.fit_transform(x)

    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    #x['close'] = y
    shape = x.shape[1]
    X_train, y_train = load.load_data(x, WINDOW, TrainTest = False)
    #X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.90, TrainTest = True)

    model = build_model(input_shape=(WINDOW, shape))

    print('START FIT MODEL...')

    start = time.time()

   # history = History()
    #history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32, epochs=500,verbose=1,
              callbacks=[history])

Esempio n. 3

0

Mostra file

File: score_model.py Progetto: visenger/reproducible-model

def run_scoring(args):
    with open(args.config, "r") as f:
        config = yaml.load(f)

    if args.csv is not None:
        df = load_data(how="csv", csv=dict(path=args.csv))
    elif "load_data" in config:
        df = load_data(**config["load_data"])
    else:
        raise ValueError("Path to CSV for input data must be provided through --csv or "
                         "'load_data' configuration must exist in config file")

    y_predicted = score_model(df, **config["score_model"])

    if args.save is not None:
        pd.DataFrame(y_predicted).to_csv(args.save, index=False)

Esempio n. 4

0

Mostra file

def test_load_data():
    iris = load_data()

    assert iris.shape == (150, 6)

    assert sorted(list(set(
        iris.species))) == ['setosa', 'versicolor', 'virginica']

    assert 'iris.npy' in os.listdir()
    os.remove('iris.npy')

Esempio n. 5

0

Mostra file

def run_training(args):
    with open(args.config, "r") as f:
        config = yaml.load(f)

    if args.csv is not None:
        df = load_data(how="csv", csv=dict(path=args.csv))
    elif "load_data" in config:
        df = load_data(**config["load_data"])
    else:
        raise ValueError(
            "Path to CSV for input data must be provided through --csv or "
            "'load_data' configuration must exist in config file")

    tmo = train_model(df, **config["train_model"])

    if args.save is not None:
        with open(args.save, "wb") as f:
            pickle.dump(tmo, f)
        logger.info("Trained model object saved to %s", args.save)

Esempio n. 6

0

Mostra file

def sorted_spikes_1D_decoding(epoch_key):
    data = load_data(epoch_key)

    logging.info(data["neuron_info"].area.value_counts())

    for area in data["neuron_info"].area.unique():
        logging.info(f"Decoding {area}...")

        cv = KFold()
        results = []

        neuron_id = (data["neuron_info"].loc[data["neuron_info"].area ==
                                             area].neuron_id)
        spikes = data["spikes"].loc[:, neuron_id]

        for fold_ind, (train, test) in enumerate(
                cv.split(data["position_info"].index)):
            logging.info(f"Fitting Fold #{fold_ind + 1}...")
            classifier = SortedSpikesClassifier(**classifier_parameters)
            classifier.fit(
                position=data["position_info"].iloc[train].linear_position,
                spikes=spikes.iloc[train],
                track_graph=data["track_graph"],
                edge_order=EDGE_ORDER,
                edge_spacing=EDGE_SPACING,
            )

            logging.info("Predicting posterior...")
            results.append(
                classifier.predict(
                    spikes.iloc[test],
                    time=data["position_info"].iloc[test].index /
                    np.timedelta64(1, "s"),
                    state_names=state_names,
                ))
            classifier.save_model(
                os.path.join(
                    PROCESSED_DATA_DIR,
                    f"{epoch_key[0]}_{epoch_key[1]:02d}_{epoch_key[2]:02d}_"
                    f"sortedspikes_{area}_model_fold{fold_ind}.pkl"))

        # concatenate cv classifier results
        results = xr.concat(results, dim="time")

        results.to_netcdf(
            os.path.join(
                PROCESSED_DATA_DIR,
                f"{epoch_key[0]}_{epoch_key[1]:02d}_{epoch_key[2]:02d}_"
                f"sortedspikes_{area}_results.nc"))

    logging.info("Done...\n\n")

Esempio n. 7

0

Mostra file

def run_analysis(epoch_key, use_likelihoods):
    animal, day, epoch = epoch_key
    data_types = set(itertools.chain(*use_likelihoods.values()))
    data = load_data(epoch_key)
    assert np.allclose(data['is_ripple'].shape[0],
                       data['position_info'].shape[0],
                       data['spikes'].shape[0],
                       data['multiunit'].shape[0])
    plot_behavior(data['position_info'])
    figure_name = f'behavior_{animal}_{day:02d}_{epoch:02d}.png'
    plt.savefig(os.path.join(FIGURE_DIR, 'behavior', figure_name))

    replay_detector = ReplayDetector(**detector_parameters)
    logging.info(replay_detector)

    replay_detector.fit(
        is_ripple=data['is_ripple'],
        speed=data['position_info'].speed,
        position=data['position_info'].linear_position,
        spikes=data['spikes'],
        multiunit=data['multiunit'],
        track_graph=data['track_graph'],
        center_well_id=0,
        edge_order=EDGE_ORDER,
        edge_spacing=EDGE_SPACING,
    )

    # Plot detector fits
    if 'spikes' in data_types:
        axes = replay_detector.plot_spikes(
            data['spikes'], data['position_info'].linear_position,
            data['is_ripple'], sampling_frequency=SAMPLING_FREQUENCY)
        replay_detector.plot_fitted_place_fields(
            sampling_frequency=SAMPLING_FREQUENCY, axes=axes)
        figure_name = f'spikes_{animal}_{day:02d}_{epoch:02d}.png'
        plt.savefig(os.path.join(FIGURE_DIR, 'detector', figure_name))

    decode(data, replay_detector, use_likelihoods,
           epoch_key, SAMPLING_FREQUENCY)

    logging.info('Done...\n\n')

Esempio n. 8

0

Mostra file

def main():
    BATCH_SIZE = 32
    TEXT, LABEL, train_iterator, valid_iterator, test_iterator = load_data(BATCH_SIZE)
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 300
    HIDDEN_DIM = 512
    OUTPUT_DIM = 1
    NUM_LAYERS = 3
    DROPOUT = 0.4
    N_EPOCHS = 5
    PATH = './weight/weight_w_attention.pth'
    ATTN_FLAG = True

    print('data loading done')

    model = Model(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, DROPOUT)

    # check the embedding vector
    # pretrained_embeddings = TEXT.vocab.vectors
    # print(pretrained_embeddings.shape)

    # set an optimizer and a loss function
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCEWithLogitsLoss()

    # for a gpu environment
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = criterion.to(device)

    for epoch in range(N_EPOCHS):
        train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion, ATTN_FLAG)
        valid_loss, valid_acc = eval_run(model, valid_iterator, criterion, ATTN_FLAG)
        print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')
    
    test_loss, test_acc = eval_run(model, test_iterator, criterion, ATTN_FLAG)
    print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')
    if ATTN_FLAG is True:
        attn_visualization(model, test_iterator, TEXT, multiple_flag=True)
    torch.save(model.state_dict(), PATH)

Esempio n. 9

0

Mostra file

File: generate_features.py Progetto: yzelac/msia423-final-project

def run_features(args):
    """Orchestrates the generating of features from commandline arguments."""

    with open(args.config, "r") as f:
        config = yaml.load(f)

    if args.input is not None:
        df = pd.read_csv(args.input)
    elif "load_data" in config:
        df = load_data(config["load_data"])
    else:
        raise ValueError(
            "Path to CSV for input data must be provided through --csv or "
            "'load_data' configuration must exist in config file")

    df = generate_features(df, **config["generate_features"])

    if args.output is not None:
        df.to_csv(args.output, index=False)
        logger.info("Features saved to %s", args.output)

    return df

Esempio n. 10

0

Mostra file

def main():
    epoch_key = ('bon', 3, 2)
    speed_metric = 'linear_speed'
    position_metric = 'linear_distance'

    data_types = set(itertools.chain(*USE_LIKELIHOODS.values()))
    data = load_data(epoch_key, ANIMALS, SAMPLING_FREQUENCY, data_types,
                     BRAIN_AREAS, speed_metric)
    n_components = np.linspace(15, 60, 4, dtype=int)
    weight_concentration_prior = np.logspace(-2, 2, 5)

    grid_parameters = [
        dict(n_components=n, tol=1E-6, max_iter=200,
             weight_concentration_prior=w)
        for n, w in itertools.product(n_components, weight_concentration_prior)
    ]

    results = []
    for multiunit_model_kwargs in grid_parameters:
        results.append(cross_validate_position_decoding(
            data, multiunit_density_model=BayesianGaussianMixture,
            multiunit_model_kwargs=multiunit_model_kwargs,
            position_metric=position_metric, speed_metric=speed_metric,
            use_smoother=True, data_types={'multiunit'},
            n_splits=5))
    keys = list(itertools.product(n_components, weight_concentration_prior))
    results = pd.concat(results, axis=1, keys=keys,
                        names=['n_components', 'weight_concentration_prior'])
    results = (results
               .unstack()
               .reset_index()
               .rename(columns={0: 'root_mean_squared_error'})
               .drop(columns='level_2'))
    average_rmse = (results
                    .groupby(['n_components', 'weight_concentration_prior'])
                    .root_mean_squared_error
                    .agg(['mean', 'std']))
    print(average_rmse)

Esempio n. 11

0

Mostra file

File: genres_based_recom.py Progetto: Sinba7/Movie-Recommender

import pandas as pd
# from src.load_data import rating_df, movie_df
from src.load_data import load_data
import logbook
import sys

rating_df, movie_df, genres, rated_movie_df = load_data()

# logbook
function_log = logbook.Logger('RECOMMENDER')
level = logbook.TRACE
logbook.StreamHandler(sys.stdout, level=level).push_application()

try:
    function_log.trace('Start parsing data for genres based function')
    # get n_reviewers and average rating
    movie_rating_df = movie_df.copy()
    # movie_rating_df['n_reviewers'] = rating_df.groupby('movieid')['rating'].count().reset_index()['rating']
    # movie_rating_df['avg_rating'] = rating_df.groupby('movieid')['rating'].mean().reset_index()['rating']

    # calculate total number of reviewers for each movie
    n_reviewers = rating_df.groupby('movieid')['rating'].count()
    n_reviewers.name = 'n_reviewers'
    movie_rating_df = movie_rating_df.join(n_reviewers,
                                           on='movieid',
                                           how='left')
    # calculate average rating for each movie
    avg_rating = rating_df.groupby('movieid')['rating'].mean()
    avg_rating.name = 'avg_rating'
    movie_rating_df = movie_rating_df.join(avg_rating,
                                           on='movieid',

Esempio n. 12

0

Mostra file

File: logistic_regression.py Progetto: m-inh/nlp-assignment-1

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from src.load_data import load_data

X, y = load_data()
X_train, X_test, y_train, y_test = train_test_split(X, y)

model = LogisticRegression(solver="lbfgs").fit(X_train, y_train)

training_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print("training set score: %f" % training_score)
print("test set score: %f" % test_score)
"""
training set score: 0.846784
test set score: 0.708772
"""

Esempio n. 13

0

Mostra file

from keras.models import load_model
from NetworkConfigurations import config
from src.load_data import load_data, get_files
from src.PredictSave import predict, save_images
from src.loss_functions import weighted_dice_coefficient_loss2d

model = load_model(config["path_to_model"],
                   custom_objects={
                       'weighted_dice_coefficient_loss2d':
                       weighted_dice_coefficient_loss2d
                   })

paths, masks_paths = get_files(path_to_data=config["path_to_data"])

images, masks = load_data(paths=paths,
                          masks_paths=masks_paths,
                          train_modalities=config["train_modalities"],
                          image_shape=config["image_shape"],
                          train_validate_rate=config["train_validate_rate"])

predictions_0, predictions_1, constr_masks = predict(
    model=model,
    images=images,
    masks=masks,
    image_shape=config["image_shape"],
    input_shape=config["input_shape"])

save_images(predictions=predictions_1, constr_masks=constr_masks)

Esempio n. 14

0

Mostra file

File: LSTM_5_short.py Progetto: anotherotherme/pricePrediction

    x = all_df[['open', 'low', 'high', 'volume']].copy()
    
    y = all_df['close'].copy()
    
    x = pd.ewma(x,2)
    y = pd.ewma(y,2)
    
    x[['open', 'low', 'high', 'volume']] = x_scaler.fit_transform(x)

    y = y_scaler.fit_transform(y.values.reshape(-1, 1))

    
    shape = x.shape[1]
    #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False) # не удалять, чтобы переключить на сбор данных только трейна
    X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True)
    
    model = build_model(input_shape=(WINDOW, shape))
    
    print('START FIT MODEL...')
    
    start = time.time()
    
    history = History()
    history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=128, epochs=200,verbose=1,
              callbacks=[history])
    
    #model.fit(X_train, y_train, batch_size=128, epochs=200, verbose=1)# не удалять, чтобы переключить наобучение  только трейна
    end = time.time()

    print ('Learning time: ', end-start)

Esempio n. 15

0

Mostra file

def main(epoch_key,
         speed_metric='linear_speed',
         position_metric='linear_distance',
         use_smoother=False):
    data = load_data(epoch_key, ANIMALS, SAMPLING_FREQUENCY, BRAIN_AREAS,
                     speed_metric)

    replay_detector = ReplayDetector(**detector_parameters)
    replay_detector.fit(is_replay=data['is_ripple'],
                        speed=data['position_info'].linear_speed,
                        position=data['position_info'][position_metric],
                        lfp_power=data['power'],
                        spikes=data['spikes'],
                        multiunit=data['multiunit'])

    lfp_power_detector_results = replay_detector.predict(
        speed=data['position_info'].linear_speed,
        position=data['position_info'][position_metric],
        lfp_power=data['power'],
        spikes=data['spikes'],
        multiunit=data['multiunit'],
        time=data['position_info'].index,
        use_likelihoods=['lfp_power'],
        use_smoother=use_smoother)

    spikes_detector_results = replay_detector.predict(
        speed=data['position_info'].linear_speed,
        position=data['position_info'][position_metric],
        lfp_power=data['power'],
        spikes=data['spikes'],
        multiunit=data['multiunit'],
        time=data['position_info'].index,
        use_likelihoods=['spikes'],
        use_smoother=use_smoother)

    multiunit_detector_results = replay_detector.predict(
        speed=data['position_info'].linear_speed,
        position=data['position_info'][position_metric],
        lfp_power=data['power'],
        spikes=data['spikes'],
        multiunit=data['multiunit'],
        time=data['position_info'].index,
        use_likelihoods=['multiunit'],
        use_smoother=use_smoother)

    spikes_replay_info, _ = get_replay_times(spikes_detector_results)
    lfp_power_replay_info, _ = get_replay_times(lfp_power_detector_results)
    multiunit_replay_info, _ = get_replay_times(multiunit_detector_results)

    replay_infos = OrderedDict(
        [('ad_hoc_ripple', data['ripple_times']),
         ('ad_hoc_multiunit', data['multiunit_high_synchrony_times']),
         ('lfp_power', lfp_power_replay_info), ('spikes', spikes_replay_info),
         ('clusterless', multiunit_replay_info)], )

    animal, day, epoch = epoch_key
    for data_source, replay_info in replay_infos.items():
        logging.info(f'{data_source}...')
        folder = join(FIGURE_DIR, f'replays_{data_source}')
        os.makedirs(folder, exist_ok=True)
        for replay_number in tqdm(replay_info.index):
            fig, _ = plot_replay_with_data(replay_number, data, replay_info,
                                           replay_detector,
                                           spikes_detector_results,
                                           lfp_power_detector_results,
                                           multiunit_detector_results,
                                           epoch_key)
            figure_name = (f'{animal}_{day:02d}_{epoch:02d}_{data_source}'
                           f'_{replay_number:03d}.png')
            figure_path = join(folder, figure_name)
            plt.savefig(figure_path, bbox_inches='tight')
            plt.close(fig)

Esempio n. 16

0

Mostra file

def data_fixture():
    rating_df, movie_df, genres, rated_movie_df = load_data()
    return rating_df, movie_df, rated_movie_df

Esempio n. 17

0

Mostra file

def nextDayPrediction(typeBlockchain, stock):

    df = get_data.get_data_frame(typeBlockchain, stock)
    df.index = df.date

    x = df[['close']].copy()
    y = df[['close']].copy()

    NUM_FEATURES = x.shape[1]

    x = pd.ewma(x, 2)
    y = pd.ewma(y, 2)

    # scaling data
    scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()
    x[['close']] = scaler.fit_transform(x)
    y[['close']] = y_scaler.fit_transform(y)

    x[['cl_2']] = y

    # Load data. Split train Test
    #X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True)
    X_train, y_train = load.load_data(x, WINDOW, TrainTest=False)
    x = x.close  ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    model = build_model(input_shape=(WINDOW, N))

    # training our model

    print('START FIT MODEL...')
    #history = History()
    #history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32, epochs=500,verbose=0,
    #          callbacks=[history])
    model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=1)

    today = time.strftime("_%d_%m_%Y")

    pathModel = "../../models/model_1f_" + typeBlockchain + today + ".h5"

    save_model(model, pathModel)

    #model = load_model(pathModel)

    # one day prediction. get last batch known data (now we didnt need in y value and can predict it)
    lastbatch = np.array(x[-WINDOW:])
    pred = model.predict([lastbatch.reshape(1, WINDOW, NUM_FEATURES)])
    pred = np.array(y_scaler.inverse_transform(pred))  # predicted value

    # now we make dataframe and create row names in date

    lastDate = str(df.last_valid_index()).split('-')
    currentData = datetime.date(int(lastDate[0]), int(lastDate[1]),
                                int(lastDate[2])) + datetime.timedelta(1)
    predictionDate = pd.date_range(currentData, periods=1)
    prediction = pd.DataFrame(pred, columns=["close"], index=predictionDate)

    print(prediction)

    del model

    K.clear_session()

    return prediction

Esempio n. 18

0

Mostra file

def run_experiment(victim_model_name: str,
                   reference_model_names: List[str],
                   dataset: str,
                   loss: str,
                   epsilon: float,
                   tau: float,
                   delta: float,
                   eta: float,
                   eta_g: float,
                   n_images: int,
                   image_limit: int,
                   compare_gradients: bool,
                   show_images: bool,
                   seed: int = 0,
                   check_success: bool = True) -> None:
    """
    Runs an experiment of the subspace attack on a batch of images. It outputs the results in the
    `outputs/` folder, in a file named `YYYY-MM-DD.HH-MM.npy`. The output file is a dictionary
    exported with `numpy.save`. The format of the dictionary is:

    ```python
    experiment_info = {
        'experiment_baseline': {
            'victim_model': victim_model_name,
            'reference_model_names': reference_model_names,
            'dataset': dataset
        },
        'hyperparameters': {
            'tau': tau,
            'epsilon': epsilon,
            'delta': delta,
            'eta': eta,
            'eta_g': eta_g
        },
        'settings': {
            'n_images': n_images,
            'image_limit': image_limit,
            'compare_gradients': compare_gradients,
            'gpu': # If the GPU has been used for the experiment,
            'seed': seed
        },
        'results': {
            'queries': # The number of queries run
            'total_time' # The time it took to run the experiment
            # The following are present only if compare_gradients == True
            'gradient_products': # The cosine similarities for each image
            'true_gradient_norms': # The norms of the true gradients for each image
            'estimated_gradient_norms': # The norms of the estimated gradients for each image
            'true_losses': # The true losses each iteration
            'common_signs': # The percentages of common signs between true and est gradients
            'subs_common_signs': # The percentages of common signs between subsequent gradients
    }
    ```

    The name of the hyperparameters are the same used in [1]. The equivalents in [2] are also
    explaned for each parameter.

    Parameters
    ----------
    victim_model_name: str
        The name of the model to be attacked.

    reference_model_names: int
        The list of names of the models to be used as references.

    dataset: str
        The dataset from which the examples should be generated.

    loss: str
        The name of the loss function to be used.

    epsilon: float
        The maximum perturbation allowed $\ell\infty$ norm. In [2] it has the same name.

    tau: float
        The Bandit exploration ($\delta$ in [2]).

    delta: float
        Finite difference probe (The lower $\eta$ in [2]).

    eta_g: float
        OCO learning rate (The upper $\eta$ in [2]).

    eta: float
        Image learning rate (h in [2]).

    n_images: int
        The number of images on which the attack should be run.

    limit: int
        The maximum number of queries to be attempted.

    compare_gradients: bool
        Whether the real and the estimated gradients should be estimated after each loop.
        **Warning**: the use of this feature slows down the attack. It should be used just to
        check experimetally the behavior of the gradients.

    show_images: bool
        Whether each image to be attacked, and its corresponding adversarial examples should be shown.

    seed: int
        The seed to be used to initialize pseudo-random generators. To be used for reproducibility
        purposes.

    check_success: bool
        Whether the attack should stop if it has been successful. Default is true. You might want to
        use false if you want to record some events (i.e. loss or gradients similarity) for all the iterations.

    References
    ----------
    [1] Guo, Yiwen, Ziang Yan, and Changshui Zhang. "Subspace Attack: Exploiting Promising Subspaces
        for Query-Efficient Black-box Attacks." Advances in Neural Information Processing Systems 2019.

    [2] Ilyas, Andrew, Logan Engstrom, and Aleksander Madry. "Prior convictions: Black-box adversarial
        attacks with bandits and priors." arXiv preprint arXiv:1807.07978 (2018).
    """
    # Fix the seeds for reproducibility purposes
    torch.manual_seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Print introductory message
    print('----- Running experiment with the following settings -----')
    print('\n----- Models information -----')
    print(f'Victim model: {victim_model_name}')
    print(f'Reference models names: {reference_model_names}')
    print(f'Dataset: {dataset.value}')
    print(f'Loss function: {loss.value}')

    print(f'\n------ Hyperparameters -----')
    print(f'tau: {tau}')
    print(f'epsilon: {epsilon}')
    print(f'delta: {delta}')
    print(f'eta: {eta}')
    print(f'eta_g: {eta_g}')

    print('\n----- General settings -----')
    print(f'Number of images: {n_images}')
    print(f'Limit of iterations per image: {image_limit}')
    print(f'Compare gradients: {compare_gradients}')
    print(f'Show images: {show_images}')
    print(f'Seed: {seed}')
    print(f'GPU in use: {torch.cuda.is_available()}')
    print(f'Check success: {check_success}')

    # Save experiment initial information
    experiment_info = {
        'experiment_baseline': {
            'victim_model': victim_model_name,
            'reference_model_names': reference_model_names,
            'dataset': dataset.value,
            'loss': loss.value
        },
        'hyperparameters': {
            'tau': tau,
            'epsilon': epsilon,
            'delta': delta,
            'eta': eta,
            'eta_g': eta_g
        },
        'settings': {
            'n_images': n_images,
            'image_limit': image_limit,
            'compare_gradients': compare_gradients,
            'gpu': torch.cuda.is_available(),
            'seed': seed,
            'check_success': check_success
        },
        'results': {
            # Initialize dict entry to save results later.
        }
    }

    # Load data using required dataset
    data_loader, classes = load_data(dataset, True)
    num_classes = len(classes)

    # Load reference models
    reference_models = [
        load_model(model_name, num_classes)
        for model_name in reference_model_names
    ]

    # Load victim model
    victim_model = load_model(victim_model_name, num_classes)

    # Move models to CUDA, if available
    if torch.cuda.is_available():
        reference_models = list(
            map(lambda model: model.to('cuda'), reference_models))
        victim_model = victim_model.to('cuda')

    # Get loss function
    criterion = load_loss(loss)

    # Set victim model to `eval()` mode to avoid dropout and batch normalization
    victim_model.eval()

    # Initialize images counter
    counter = 0

    # Initalize the arrays to save results
    queries = []
    final_models = []
    all_true_gradient_norms = []
    all_estimated_gradient_norms = []
    all_gradient_products = []
    all_true_losses = []
    all_common_signs = []
    all_subs_common_signs = []

    # Initialize timing information
    run_time = datetime.datetime.now().replace(microsecond=0)
    tic = time.time()

    print(f'\n----- Beginning at {run_time} -----')

    # Loop over the dataset
    for data, target in data_loader:
        print(f'\n--------------------------------------------\n')
        print(f'Target number {counter}\n')

        # Attack the image
        (queries_counter, gradient_products, true_gradient_norms, estimated_gradient_norms,
         true_losses, common_signs, subs_common_signs, final_model) = \
            attack(data, criterion, target, epsilon, tau, delta,
                   eta_g, eta, victim_model, reference_models,
                   image_limit, compare_gradients, show_images, check_success=check_success)

        counter += 1

        # Save the results of the attack
        queries.append(queries_counter)
        final_models.append(final_model)
        all_gradient_products.append(gradient_products)
        all_true_gradient_norms.append(true_gradient_norms)
        all_estimated_gradient_norms.append(estimated_gradient_norms)
        all_true_losses.append(true_losses)
        all_common_signs.append(common_signs)
        all_subs_common_signs.append(subs_common_signs)

        # Stop if all the required images have been attacked
        if counter == n_images:
            break

    # Save the total time
    total_time = time.time() - tic

    # Make an np.array aout of the queries array to print some stats
    queries_array = np.array(queries)
    failed = queries_array == -1

    print(f'\n-------------\n')
    print(f'Experiment finished:\n')
    print(f'Mean number of queries: {queries_array[~failed].mean()}')
    print(f'Median number of queries: {np.median(queries_array[~failed])}')
    print(f'Number of failed queries: {len(queries_array[failed])}')
    print(f'Total time: {total_time} s')
    print(f'\n-------------\n')

    # Save experiment run information
    experiment_info['results']['queries'] = queries_array
    experiment_info['results']['total_time'] = total_time
    experiment_info['results']['final_model'] = final_models

    # Save gradients information, if required by experiment run
    if compare_gradients:
        experiment_info['results']['gradient_products'] = np.array(
            all_gradient_products)
        experiment_info['results']['true_gradient_norms'] = np.array(
            all_true_gradient_norms)
        experiment_info['results']['estimated_gradient_norms'] = np.array(
            all_estimated_gradient_norms)
        experiment_info['results']['true_losses'] = np.array(all_true_losses)
        experiment_info['results']['common_signs'] = np.array(all_common_signs)
        experiment_info['results']['subs_common_signs'] = np.array(
            all_subs_common_signs)

    # Take care of results output folder
    results_path = OUTPUT_DIR
    experiment_info_filename = run_time.strftime('%Y-%m-%d.%H-%M')
    if not os.path.exists(results_path):
        os.makedirs(results_path)

    # Save results
    np.save(results_path + experiment_info_filename,
            experiment_info,
            allow_pickle=True)

Esempio n. 19

0

Mostra file

def main():
    data_generator = load_data()
    _history = []
    device = None
    model = None
    criterion = None
    fold_index = 0

    for TEXT, LABEL, train_data, val_data in data_generator.get_fold_data(
            num_folds=args['num_folds']):
        logger.info("***** Running Training *****")
        logger.info(f"Now fold: {fold_index + 1} / {args['num_folds']}")

        TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.300d")
        logger.info(f'Embedding size: {TEXT.vocab.vectors.size()}.')
        LABEL.build_vocab(train_data)

        model = Model(len(TEXT.vocab), args['embedding_dim'],
                      args['hidden_dim'], args['output_dim'],
                      args['num_layers'], args['dropout'])

        optimizer = optim.Adam(model.parameters())
        criterion = nn.BCEWithLogitsLoss()

        if args['gpu'] is True and args['gpu_number'] is not None:
            torch.cuda.set_device(args['gpu_number'])
            device = torch.device('cuda')
            model = model.to(device)
            criterion = criterion.to(device)
        else:
            device = torch.device('cpu')
            model = model.to(device)
            criterion = criterion.to(device)

        train_iterator = data.Iterator(train_data,
                                       batch_size=args['batch_size'],
                                       sort_key=lambda x: len(x.text),
                                       device=device)
        val_iterator = data.Iterator(val_data,
                                     batch_size=args['batch_size'],
                                     sort_key=lambda x: len(x.text),
                                     device=device)

        for epoch in range(args['epochs']):
            train_loss, train_acc = train_run(model, train_iterator, optimizer,
                                              criterion)
            logger.info(
                f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
        val_loss, val_acc = eval_run(model, val_iterator, criterion)
        logger.info(
            f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

        _history.append([val_loss, val_acc])
        fold_index += 1

    _history = np.asarray(_history)
    loss = np.mean(_history[:, 0])
    acc = np.mean(_history[:, 1])

    logger.info('***** Cross Validation Result *****')
    logger.info(f'LOSS: {loss}, ACC: {acc}')

Esempio n. 20

0

Mostra file

File: LSTM_5_features.py Progetto: anotherotherme/pricePrediction

def nextDayPrediction(typeBlockchain, stock):    

    
    df = get_data.get_data_frame(typeBlockchain, stock)

    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    all_df = df.copy()

    x = all_df[['open', 'low', 'high', 'volume']].copy()
    
    y = all_df['close'].copy()
    
    x = pd.ewma(x,2)
    y = pd.ewma(y,2)
    x[['open', 'low', 'high', 'volume']] = x_scaler.fit_transform(x)
    NUM_FEATURES = x.shape[1]
    
    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    x['close'] = y
    
    #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False)
    X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True)
    x = x[['open', 'low', 'high', 'volume']]
    
    model = build_model(input_shape=(WINDOW, NUM_FEATURES))
    
    print('START FIT MODEL...')
    
    start = time.time()
    
    #history = History()
    #history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32, epochs=500,verbose=0,
    #          callbacks=[history])
    
    model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=0)
    end = time.time()

    print ('Learning time: ', end-start)
    
    today = time.strftime("_%d_%m_%Y")
    
    pathModel = "../../models/model_5f_" + typeBlockchain + today +".h5"
    save_model(model, pathModel)
    
    #model = load_model(pathModel)
    
    # one day prediction. get last batch known data (now we didnt need in y value and can predict it)   
    lastbatch = np.array(x[-WINDOW:])
    pred = model.predict([lastbatch.reshape(1,WINDOW, NUM_FEATURES)])
    pred =  np.array(y_scaler.inverse_transform(pred)) # predicted value

    # now we make dataframe and create row names in date

    lastDate =str(df.date[df.last_valid_index()]).split('-')
    currentData = datetime.date(int(lastDate[0]),int(lastDate[1]),int(lastDate[2])) + datetime.timedelta(1)
    predictionDate = pd.date_range(currentData, periods=1)
    prediction = pd.DataFrame(pred, columns=["predictionPrice"], index = predictionDate.values)


    print (prediction)
    del model
    
    K.clear_session()
    
    return prediction

Esempio n. 21

0

Mostra file

File: main.py Progetto: chrisakroyd/kaggle-speech-recognition

# Model
from src.models.general_CNN import CNNModel
# Generator and pre-processing.
from src.audio_data_generator import AudioDataGenerator

TRAIN = True
WRITE_RESULTS = True
# File paths to data
TRAIN_PATH = './input/train/audio/'
TEST_PATH = './input/test/audio'
VAL_FILE_PATH = './input/train/validation_list.txt'
# What feature representation we use.
FEATURE_REP = 'log_mel_spectrogram'

(x_train,
 y_train), (x_val, y_val), label_binarizer = load_data(path=TRAIN_PATH,
                                                       val_path=VAL_FILE_PATH)

model_instance = CNNModel()

audio_preprocessor = AudioDataGenerator(generator_method=FEATURE_REP)

if TRAIN:
    model = model_instance.create_model(
        audio_preprocessor.get_data_shape(x_train[0]))

    tensorboard = TensorBoard(log_dir='./logs/{}'.format(time.time()),
                              batch_size=model_instance.BATCH_SIZE)
    checkpoint = ModelCheckpoint(model_instance.checkpoint_path,
                                 monitor='val_loss')

    early_stop = EarlyStopping(monitor='val_loss',

Esempio n. 22

0

Mostra file

File: LSTM_with_INDEXES.py Progetto: anotherotherme/pricePrediction

def nextDayPrediction(typeBlockchain, stock):
    """
    Triggers for plotting
    """

    DJA = IdexDataframe("DJA")
    GSPC = IdexDataframe("GSPC")
    NYA = IdexDataframe("NYA")

    loaded = get_data.get_data_frame(typeBlockchain, stock)
    loaded.index = loaded.date

    loaded = loaded[['open', 'close', 'low', 'high', 'volume']]
    df = pd.concat([DJA, GSPC, NYA, loaded], axis=1, ignore_index=False)
    df = df.fillna(method='ffill')
    df = df.dropna(axis=0, how='any')

    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    all_df = df.copy()

    feature = [
        'OpenDJA', 'HighDJA', 'LowDJA', 'CloseDJA', 'Adj CloseDJA',
        'VolumeDJA', 'OpenGSPC', 'HighGSPC', 'LowGSPC', 'CloseGSPC',
        'Adj CloseGSPC', 'VolumeGSPC', 'OpenNYA', 'HighNYA', 'LowNYA',
        'CloseNYA', 'Adj CloseNYA', 'VolumeNYA', 'open', 'low', 'high',
        'volume'
    ]

    x = all_df[feature].copy()
    y = all_df['close'].copy()

    #x = pd.ewma(x,2)
    #y = pd.ewma(y,2)

    x[feature] = x_scaler.fit_transform(x)

    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    x['close'] = y

    num_features = x.shape[1]
    #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False)
    X_train, y_train, X_test, y_test = load.load_data(x,
                                                      WINDOW,
                                                      train_size=1.0,
                                                      TrainTest=True)

    model = build_model(input_shape=(WINDOW, num_features))

    print('START FIT MODEL...')

    start = time.time()

    #history = History()
    #history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32, epochs=500,verbose=0,
    #          callbacks=[history])

    model.fit(X_train, y_train, batch_size=32, epochs=5, verbose=1)
    end = time.time()

    print('Learning time: ', end - start)

    today = time.strftime("_%d_%m_%Y")

    pathModel = "../../models/model_5f_" + typeBlockchain + today + ".h5"

    save_model(model, pathModel)

    del model

    K.clear_session()

    model = load_model(pathModel)
    #model = load_model(pathModel)
    # one day prediction. get last batch known data (now we didnt need in y value and can predict it)
    lastbatch = np.array(x[-WINDOW:])
    pred = model.predict([lastbatch.reshape(1, 22, num_features)])
    pred = np.array(y_scaler.inverse_transform(pred))  # predicted value
    prediction = pred.reshape(-1)

    print(prediction)

    return prediction

Esempio n. 23

0

Mostra file

from src.load_data import load_data, prepare_data
from src.model import classifier_model
from src.train import train_model, save_model, evaluate_model
from src.utils import plot_learning_curves

# Main file to train a model with the name defined below

model_name = "2_blocks_20_epochs"

# Load and prepare dataset
x_train, y_train, x_test, y_test = load_data()
x_train, y_train, x_test, y_test = prepare_data(x_train, y_train, x_test,
                                                y_test)

# Train model on data
model = classifier_model()
trained_model, model_history = train_model(model, x_train, y_train, x_test,
                                           y_test)

# Save model and output results
save_model(trained_model, model_name)
evaluate_model(trained_model, x_test, y_test)
plot_learning_curves(model_history, model_name)

Esempio n. 24

0

Mostra file

# Local
local_data_path = '../data/fer2013/fer2013.csv'
local_data_path2 = 'data/fer2013/fer2013.csv'

data_path_r = None
if os.path.isfile(local_data_path2):
    data_path_r = local_data_path2
elif os.path.isfile(local_data_path):
    data_path_r = local_data_path
else:
    data_path_r = drive_data_path

img_size = (48, 48)

x_train, y_train, x_val, y_val, x_test, y_test = load_data(data_path_r)

history = pickle.load(open("history.bin", "rb"))

save_path_2 = "save_model_2.ckpt"
model = load_model(save_path_2)

pred_y = model.predict(x_test)

print(pred_y[0], y_test[0])


def make_int_y(y):
    y = np.array(y)
    y__ = []
    for k in y:

Esempio n. 25

0

Mostra file

box_axs[8, 6].axis("off")
box_axs[8, 5].axis("off")
box_axs[8, 4].axis("off")

for i, col_name in enumerate(column_names):
    r = int(i / 8)
    c = int(i % 8)
    ax = box_axs[r, c]
    sns.boxplot(train_df[col_name], ax=ax, palette=cmap)

box_fig.savefig('images/boxplots.png')

#
# Barplots
#
train_df = load_data()
column_names = train_df.columns.tolist()[:1]

bar_fig = Figure(figsize=(50, 50))
bar_fig.tight_layout()
bar_axs = bar_fig.subplots(9, 8)

bar_axs[8, 7].axis("off")
bar_axs[8, 6].axis("off")
bar_axs[8, 5].axis("off")
bar_axs[8, 4].axis("off")

for i, col_name in enumerate(column_names):
    r = int(i / 8)
    c = int(i % 8)
    ax = bar_axs[r, c]

Esempio n. 26

0

Mostra file

File: LSTM indicatorFeature.py Progetto: anotherotherme/pricePrediction

def nextDayPrediction(typeBlockchain, stock):

    plot = True
    plotHictory = False
    interactiveGrapth = True
    plotForTrain = False

    df = get_data.get_data_frame(typeBlockchain, stock)
    df.index = df.date
    df = df[['open', 'close', 'low', 'high', 'volume']]

    df = ma_rel_diff(df)
    df = ema_rel_diff(df)
    df = mom(df)
    df = roc(df)
    df = bbands(df)
    df = normalized_bbands(df)
    df = rsi(df)
    df = stochastics(df)
    df = macd(df)
    df = atr(df)
    df = adx(df)
    df = df.dropna()

    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    all_df = df.copy()

    features = ['macd_12_26_9', 'stoch_d_14_3', 'roc_14']
    #             ['moment_20', 'ema_rel_diff_10', 'ma_rel_diff_50'],\
    #             ['atr_14', 'moment_20'],
    #             ['atr_14', 'moment_20','low', 'high'],
    #             ['roc_14', 'moment_20', 'ema_rel_diff_10' ],
    #             ['roc_14', 'rsi_14'], ['roc_14', 'rsi_14', 'macd_12_26_9']

    x = all_df[features].copy()

    y = all_df['close'].copy()
    NUM_FEATURES = x.shape[1]

    x[features] = x_scaler.fit_transform(x)

    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    x['close'] = y  ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    X_train, y_train, X_test, y_test = load.load_data(x,
                                                      WINDOW,
                                                      train_size=0.96,
                                                      TrainTest=True)

    x = all_df[features].copy()  ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    model = build_model(input_shape=(WINDOW, NUM_FEATURES))

    print('START FIT MODEL...')
    print(features)
    print()
    start = time.time()

    #history = History()
    #history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32,\
    #                   epochs=500,verbose=0,
    #          callbacks=[history])

    model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=1)
    end = time.time()

    print('Learning time: ', end - start)

    today = time.strftime("_%d_%m_%Y")
    pathModel = "./model_" + str(features) + typeBlockchain + today + ".h5"
    #pathModel = "../../models/model_low_high_USDT_BTC_03_08_2017.h5"
    save_model(model, pathModel)

    #model = load_model(pathModel)
    lastbatch = np.array(x[-WINDOW:])
    pred = model.predict([lastbatch.reshape(1, WINDOW, NUM_FEATURES)])
    pred = np.array(y_scaler.inverse_transform(pred))  # predicted value

    # one day prediction. get last batch known data (now we didnt need in y value and can predict it)
    lastDate = str(df.last_valid_index()).split('-')
    currentData = datetime.date(int(lastDate[0]), int(lastDate[1]),
                                int(lastDate[2])) + datetime.timedelta(1)
    predictionDate = pd.date_range(currentData, periods=1)
    prediction = pd.DataFrame(pred,
                              columns=["predictionPrice"],
                              index=predictionDate.values)

    print(prediction)
    return prediction

Esempio n. 27

0

Mostra file

        "weekofyear", 'summer', 'autumn', 'winter', "cos_weekday",
        "sin_weekday", 'udsprevisionempresa_shifted-1',
        'udsprevisionempresa_shifted-6', 'udsstock_shifted7',
        'roll4wd_udsstock_shifted7', 'roll4wd_udsstock'
    ]]

    _, _, predict, metrics = run(train_data, "udsstock", base_model, params,
                                 tags)
    predict["modelo"] = modelo
    return metrics, predict


if __name__ == "__main__":

    # Load Stock data
    data = load_data()

    # # Initalize base model
    # from sklearn.linear_model import LinearRegression
    # base_model = LinearRegression()
    from sklearn.ensemble import RandomForestRegressor
    base_model = RandomForestRegressor(n_estimators=200)
    params = {"n_estimators": 200}
    with open("config/model_stock.json") as config_file:
        config = json.load(config_file)

    arg1 = "all"

    # Si especificamos todos, entrena todos los modelos configurados
    if arg1 == "all":