def run_features(args): with open(args.config, "r") as f: config = yaml.load(f) if args.csv is not None: df = load_data(how="csv", csv=dict(path=args.csv)) elif "load_data" in config: df = load_data(**config["load_data"]) else: raise ValueError("Path to CSV for input data must be provided through --csv or " "'load_data' configuration must exist in config file") df = generate_features(df, **config["generate_features"])
def nextDayPrediction(typeBlockchain, stock): df = get_data.get_data_frame5minutes(typeBlockchain, stock) df = df[['open', 'low', 'high', 'close','volume', 'date_time']][-int(df.shape[0]/ 8 * 3):] df.index = df.date_time df = df.sort_index() x_scaler = MinMaxScaler() y_scaler = MinMaxScaler() all_df = df.copy() x = all_df[['low', 'high']].copy() y = all_df['close'].copy() x[['low', 'high']] = x_scaler.fit_transform(x) y = y_scaler.fit_transform(y.values.reshape(-1, 1)) #x['close'] = y shape = x.shape[1] X_train, y_train = load.load_data(x, WINDOW, TrainTest = False) #X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.90, TrainTest = True) model = build_model(input_shape=(WINDOW, shape)) print('START FIT MODEL...') start = time.time() # history = History() #history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=500,verbose=1, callbacks=[history])
def run_scoring(args): with open(args.config, "r") as f: config = yaml.load(f) if args.csv is not None: df = load_data(how="csv", csv=dict(path=args.csv)) elif "load_data" in config: df = load_data(**config["load_data"]) else: raise ValueError("Path to CSV for input data must be provided through --csv or " "'load_data' configuration must exist in config file") y_predicted = score_model(df, **config["score_model"]) if args.save is not None: pd.DataFrame(y_predicted).to_csv(args.save, index=False)
def test_load_data(): iris = load_data() assert iris.shape == (150, 6) assert sorted(list(set( iris.species))) == ['setosa', 'versicolor', 'virginica'] assert 'iris.npy' in os.listdir() os.remove('iris.npy')
def run_training(args): with open(args.config, "r") as f: config = yaml.load(f) if args.csv is not None: df = load_data(how="csv", csv=dict(path=args.csv)) elif "load_data" in config: df = load_data(**config["load_data"]) else: raise ValueError( "Path to CSV for input data must be provided through --csv or " "'load_data' configuration must exist in config file") tmo = train_model(df, **config["train_model"]) if args.save is not None: with open(args.save, "wb") as f: pickle.dump(tmo, f) logger.info("Trained model object saved to %s", args.save)
def sorted_spikes_1D_decoding(epoch_key): data = load_data(epoch_key) logging.info(data["neuron_info"].area.value_counts()) for area in data["neuron_info"].area.unique(): logging.info(f"Decoding {area}...") cv = KFold() results = [] neuron_id = (data["neuron_info"].loc[data["neuron_info"].area == area].neuron_id) spikes = data["spikes"].loc[:, neuron_id] for fold_ind, (train, test) in enumerate( cv.split(data["position_info"].index)): logging.info(f"Fitting Fold #{fold_ind + 1}...") classifier = SortedSpikesClassifier(**classifier_parameters) classifier.fit( position=data["position_info"].iloc[train].linear_position, spikes=spikes.iloc[train], track_graph=data["track_graph"], edge_order=EDGE_ORDER, edge_spacing=EDGE_SPACING, ) logging.info("Predicting posterior...") results.append( classifier.predict( spikes.iloc[test], time=data["position_info"].iloc[test].index / np.timedelta64(1, "s"), state_names=state_names, )) classifier.save_model( os.path.join( PROCESSED_DATA_DIR, f"{epoch_key[0]}_{epoch_key[1]:02d}_{epoch_key[2]:02d}_" f"sortedspikes_{area}_model_fold{fold_ind}.pkl")) # concatenate cv classifier results results = xr.concat(results, dim="time") results.to_netcdf( os.path.join( PROCESSED_DATA_DIR, f"{epoch_key[0]}_{epoch_key[1]:02d}_{epoch_key[2]:02d}_" f"sortedspikes_{area}_results.nc")) logging.info("Done...\n\n")
def run_analysis(epoch_key, use_likelihoods): animal, day, epoch = epoch_key data_types = set(itertools.chain(*use_likelihoods.values())) data = load_data(epoch_key) assert np.allclose(data['is_ripple'].shape[0], data['position_info'].shape[0], data['spikes'].shape[0], data['multiunit'].shape[0]) plot_behavior(data['position_info']) figure_name = f'behavior_{animal}_{day:02d}_{epoch:02d}.png' plt.savefig(os.path.join(FIGURE_DIR, 'behavior', figure_name)) replay_detector = ReplayDetector(**detector_parameters) logging.info(replay_detector) replay_detector.fit( is_ripple=data['is_ripple'], speed=data['position_info'].speed, position=data['position_info'].linear_position, spikes=data['spikes'], multiunit=data['multiunit'], track_graph=data['track_graph'], center_well_id=0, edge_order=EDGE_ORDER, edge_spacing=EDGE_SPACING, ) # Plot detector fits if 'spikes' in data_types: axes = replay_detector.plot_spikes( data['spikes'], data['position_info'].linear_position, data['is_ripple'], sampling_frequency=SAMPLING_FREQUENCY) replay_detector.plot_fitted_place_fields( sampling_frequency=SAMPLING_FREQUENCY, axes=axes) figure_name = f'spikes_{animal}_{day:02d}_{epoch:02d}.png' plt.savefig(os.path.join(FIGURE_DIR, 'detector', figure_name)) decode(data, replay_detector, use_likelihoods, epoch_key, SAMPLING_FREQUENCY) logging.info('Done...\n\n')
def main(): BATCH_SIZE = 32 TEXT, LABEL, train_iterator, valid_iterator, test_iterator = load_data(BATCH_SIZE) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 300 HIDDEN_DIM = 512 OUTPUT_DIM = 1 NUM_LAYERS = 3 DROPOUT = 0.4 N_EPOCHS = 5 PATH = './weight/weight_w_attention.pth' ATTN_FLAG = True print('data loading done') model = Model(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, DROPOUT) # check the embedding vector # pretrained_embeddings = TEXT.vocab.vectors # print(pretrained_embeddings.shape) # set an optimizer and a loss function optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() # for a gpu environment device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) criterion = criterion.to(device) for epoch in range(N_EPOCHS): train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion, ATTN_FLAG) valid_loss, valid_acc = eval_run(model, valid_iterator, criterion, ATTN_FLAG) print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |') test_loss, test_acc = eval_run(model, test_iterator, criterion, ATTN_FLAG) print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |') if ATTN_FLAG is True: attn_visualization(model, test_iterator, TEXT, multiple_flag=True) torch.save(model.state_dict(), PATH)
def run_features(args): """Orchestrates the generating of features from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f) if args.input is not None: df = pd.read_csv(args.input) elif "load_data" in config: df = load_data(config["load_data"]) else: raise ValueError( "Path to CSV for input data must be provided through --csv or " "'load_data' configuration must exist in config file") df = generate_features(df, **config["generate_features"]) if args.output is not None: df.to_csv(args.output, index=False) logger.info("Features saved to %s", args.output) return df
def main(): epoch_key = ('bon', 3, 2) speed_metric = 'linear_speed' position_metric = 'linear_distance' data_types = set(itertools.chain(*USE_LIKELIHOODS.values())) data = load_data(epoch_key, ANIMALS, SAMPLING_FREQUENCY, data_types, BRAIN_AREAS, speed_metric) n_components = np.linspace(15, 60, 4, dtype=int) weight_concentration_prior = np.logspace(-2, 2, 5) grid_parameters = [ dict(n_components=n, tol=1E-6, max_iter=200, weight_concentration_prior=w) for n, w in itertools.product(n_components, weight_concentration_prior) ] results = [] for multiunit_model_kwargs in grid_parameters: results.append(cross_validate_position_decoding( data, multiunit_density_model=BayesianGaussianMixture, multiunit_model_kwargs=multiunit_model_kwargs, position_metric=position_metric, speed_metric=speed_metric, use_smoother=True, data_types={'multiunit'}, n_splits=5)) keys = list(itertools.product(n_components, weight_concentration_prior)) results = pd.concat(results, axis=1, keys=keys, names=['n_components', 'weight_concentration_prior']) results = (results .unstack() .reset_index() .rename(columns={0: 'root_mean_squared_error'}) .drop(columns='level_2')) average_rmse = (results .groupby(['n_components', 'weight_concentration_prior']) .root_mean_squared_error .agg(['mean', 'std'])) print(average_rmse)
import pandas as pd # from src.load_data import rating_df, movie_df from src.load_data import load_data import logbook import sys rating_df, movie_df, genres, rated_movie_df = load_data() # logbook function_log = logbook.Logger('RECOMMENDER') level = logbook.TRACE logbook.StreamHandler(sys.stdout, level=level).push_application() try: function_log.trace('Start parsing data for genres based function') # get n_reviewers and average rating movie_rating_df = movie_df.copy() # movie_rating_df['n_reviewers'] = rating_df.groupby('movieid')['rating'].count().reset_index()['rating'] # movie_rating_df['avg_rating'] = rating_df.groupby('movieid')['rating'].mean().reset_index()['rating'] # calculate total number of reviewers for each movie n_reviewers = rating_df.groupby('movieid')['rating'].count() n_reviewers.name = 'n_reviewers' movie_rating_df = movie_rating_df.join(n_reviewers, on='movieid', how='left') # calculate average rating for each movie avg_rating = rating_df.groupby('movieid')['rating'].mean() avg_rating.name = 'avg_rating' movie_rating_df = movie_rating_df.join(avg_rating, on='movieid',
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from src.load_data import load_data X, y = load_data() X_train, X_test, y_train, y_test = train_test_split(X, y) model = LogisticRegression(solver="lbfgs").fit(X_train, y_train) training_score = model.score(X_train, y_train) test_score = model.score(X_test, y_test) print("training set score: %f" % training_score) print("test set score: %f" % test_score) """ training set score: 0.846784 test set score: 0.708772 """
from keras.models import load_model from NetworkConfigurations import config from src.load_data import load_data, get_files from src.PredictSave import predict, save_images from src.loss_functions import weighted_dice_coefficient_loss2d model = load_model(config["path_to_model"], custom_objects={ 'weighted_dice_coefficient_loss2d': weighted_dice_coefficient_loss2d }) paths, masks_paths = get_files(path_to_data=config["path_to_data"]) images, masks = load_data(paths=paths, masks_paths=masks_paths, train_modalities=config["train_modalities"], image_shape=config["image_shape"], train_validate_rate=config["train_validate_rate"]) predictions_0, predictions_1, constr_masks = predict( model=model, images=images, masks=masks, image_shape=config["image_shape"], input_shape=config["input_shape"]) save_images(predictions=predictions_1, constr_masks=constr_masks)
x = all_df[['open', 'low', 'high', 'volume']].copy() y = all_df['close'].copy() x = pd.ewma(x,2) y = pd.ewma(y,2) x[['open', 'low', 'high', 'volume']] = x_scaler.fit_transform(x) y = y_scaler.fit_transform(y.values.reshape(-1, 1)) shape = x.shape[1] #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False) # не удалять, чтобы переключить на сбор данных только трейна X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True) model = build_model(input_shape=(WINDOW, shape)) print('START FIT MODEL...') start = time.time() history = History() history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=128, epochs=200,verbose=1, callbacks=[history]) #model.fit(X_train, y_train, batch_size=128, epochs=200, verbose=1)# не удалять, чтобы переключить наобучение только трейна end = time.time() print ('Learning time: ', end-start)
def main(epoch_key, speed_metric='linear_speed', position_metric='linear_distance', use_smoother=False): data = load_data(epoch_key, ANIMALS, SAMPLING_FREQUENCY, BRAIN_AREAS, speed_metric) replay_detector = ReplayDetector(**detector_parameters) replay_detector.fit(is_replay=data['is_ripple'], speed=data['position_info'].linear_speed, position=data['position_info'][position_metric], lfp_power=data['power'], spikes=data['spikes'], multiunit=data['multiunit']) lfp_power_detector_results = replay_detector.predict( speed=data['position_info'].linear_speed, position=data['position_info'][position_metric], lfp_power=data['power'], spikes=data['spikes'], multiunit=data['multiunit'], time=data['position_info'].index, use_likelihoods=['lfp_power'], use_smoother=use_smoother) spikes_detector_results = replay_detector.predict( speed=data['position_info'].linear_speed, position=data['position_info'][position_metric], lfp_power=data['power'], spikes=data['spikes'], multiunit=data['multiunit'], time=data['position_info'].index, use_likelihoods=['spikes'], use_smoother=use_smoother) multiunit_detector_results = replay_detector.predict( speed=data['position_info'].linear_speed, position=data['position_info'][position_metric], lfp_power=data['power'], spikes=data['spikes'], multiunit=data['multiunit'], time=data['position_info'].index, use_likelihoods=['multiunit'], use_smoother=use_smoother) spikes_replay_info, _ = get_replay_times(spikes_detector_results) lfp_power_replay_info, _ = get_replay_times(lfp_power_detector_results) multiunit_replay_info, _ = get_replay_times(multiunit_detector_results) replay_infos = OrderedDict( [('ad_hoc_ripple', data['ripple_times']), ('ad_hoc_multiunit', data['multiunit_high_synchrony_times']), ('lfp_power', lfp_power_replay_info), ('spikes', spikes_replay_info), ('clusterless', multiunit_replay_info)], ) animal, day, epoch = epoch_key for data_source, replay_info in replay_infos.items(): logging.info(f'{data_source}...') folder = join(FIGURE_DIR, f'replays_{data_source}') os.makedirs(folder, exist_ok=True) for replay_number in tqdm(replay_info.index): fig, _ = plot_replay_with_data(replay_number, data, replay_info, replay_detector, spikes_detector_results, lfp_power_detector_results, multiunit_detector_results, epoch_key) figure_name = (f'{animal}_{day:02d}_{epoch:02d}_{data_source}' f'_{replay_number:03d}.png') figure_path = join(folder, figure_name) plt.savefig(figure_path, bbox_inches='tight') plt.close(fig)
def data_fixture(): rating_df, movie_df, genres, rated_movie_df = load_data() return rating_df, movie_df, rated_movie_df
def nextDayPrediction(typeBlockchain, stock): df = get_data.get_data_frame(typeBlockchain, stock) df.index = df.date x = df[['close']].copy() y = df[['close']].copy() NUM_FEATURES = x.shape[1] x = pd.ewma(x, 2) y = pd.ewma(y, 2) # scaling data scaler = MinMaxScaler() y_scaler = MinMaxScaler() x[['close']] = scaler.fit_transform(x) y[['close']] = y_scaler.fit_transform(y) x[['cl_2']] = y # Load data. Split train Test #X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True) X_train, y_train = load.load_data(x, WINDOW, TrainTest=False) x = x.close ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!! model = build_model(input_shape=(WINDOW, N)) # training our model print('START FIT MODEL...') #history = History() #history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=500,verbose=0, # callbacks=[history]) model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=1) today = time.strftime("_%d_%m_%Y") pathModel = "../../models/model_1f_" + typeBlockchain + today + ".h5" save_model(model, pathModel) #model = load_model(pathModel) # one day prediction. get last batch known data (now we didnt need in y value and can predict it) lastbatch = np.array(x[-WINDOW:]) pred = model.predict([lastbatch.reshape(1, WINDOW, NUM_FEATURES)]) pred = np.array(y_scaler.inverse_transform(pred)) # predicted value # now we make dataframe and create row names in date lastDate = str(df.last_valid_index()).split('-') currentData = datetime.date(int(lastDate[0]), int(lastDate[1]), int(lastDate[2])) + datetime.timedelta(1) predictionDate = pd.date_range(currentData, periods=1) prediction = pd.DataFrame(pred, columns=["close"], index=predictionDate) print(prediction) del model K.clear_session() return prediction
def run_experiment(victim_model_name: str, reference_model_names: List[str], dataset: str, loss: str, epsilon: float, tau: float, delta: float, eta: float, eta_g: float, n_images: int, image_limit: int, compare_gradients: bool, show_images: bool, seed: int = 0, check_success: bool = True) -> None: """ Runs an experiment of the subspace attack on a batch of images. It outputs the results in the `outputs/` folder, in a file named `YYYY-MM-DD.HH-MM.npy`. The output file is a dictionary exported with `numpy.save`. The format of the dictionary is: ```python experiment_info = { 'experiment_baseline': { 'victim_model': victim_model_name, 'reference_model_names': reference_model_names, 'dataset': dataset }, 'hyperparameters': { 'tau': tau, 'epsilon': epsilon, 'delta': delta, 'eta': eta, 'eta_g': eta_g }, 'settings': { 'n_images': n_images, 'image_limit': image_limit, 'compare_gradients': compare_gradients, 'gpu': # If the GPU has been used for the experiment, 'seed': seed }, 'results': { 'queries': # The number of queries run 'total_time' # The time it took to run the experiment # The following are present only if compare_gradients == True 'gradient_products': # The cosine similarities for each image 'true_gradient_norms': # The norms of the true gradients for each image 'estimated_gradient_norms': # The norms of the estimated gradients for each image 'true_losses': # The true losses each iteration 'common_signs': # The percentages of common signs between true and est gradients 'subs_common_signs': # The percentages of common signs between subsequent gradients } ``` The name of the hyperparameters are the same used in [1]. The equivalents in [2] are also explaned for each parameter. Parameters ---------- victim_model_name: str The name of the model to be attacked. reference_model_names: int The list of names of the models to be used as references. dataset: str The dataset from which the examples should be generated. loss: str The name of the loss function to be used. epsilon: float The maximum perturbation allowed $\ell\infty$ norm. In [2] it has the same name. tau: float The Bandit exploration ($\delta$ in [2]). delta: float Finite difference probe (The lower $\eta$ in [2]). eta_g: float OCO learning rate (The upper $\eta$ in [2]). eta: float Image learning rate (h in [2]). n_images: int The number of images on which the attack should be run. limit: int The maximum number of queries to be attempted. compare_gradients: bool Whether the real and the estimated gradients should be estimated after each loop. **Warning**: the use of this feature slows down the attack. It should be used just to check experimetally the behavior of the gradients. show_images: bool Whether each image to be attacked, and its corresponding adversarial examples should be shown. seed: int The seed to be used to initialize pseudo-random generators. To be used for reproducibility purposes. check_success: bool Whether the attack should stop if it has been successful. Default is true. You might want to use false if you want to record some events (i.e. loss or gradients similarity) for all the iterations. References ---------- [1] Guo, Yiwen, Ziang Yan, and Changshui Zhang. "Subspace Attack: Exploiting Promising Subspaces for Query-Efficient Black-box Attacks." Advances in Neural Information Processing Systems 2019. [2] Ilyas, Andrew, Logan Engstrom, and Aleksander Madry. "Prior convictions: Black-box adversarial attacks with bandits and priors." arXiv preprint arXiv:1807.07978 (2018). """ # Fix the seeds for reproducibility purposes torch.manual_seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Print introductory message print('----- Running experiment with the following settings -----') print('\n----- Models information -----') print(f'Victim model: {victim_model_name}') print(f'Reference models names: {reference_model_names}') print(f'Dataset: {dataset.value}') print(f'Loss function: {loss.value}') print(f'\n------ Hyperparameters -----') print(f'tau: {tau}') print(f'epsilon: {epsilon}') print(f'delta: {delta}') print(f'eta: {eta}') print(f'eta_g: {eta_g}') print('\n----- General settings -----') print(f'Number of images: {n_images}') print(f'Limit of iterations per image: {image_limit}') print(f'Compare gradients: {compare_gradients}') print(f'Show images: {show_images}') print(f'Seed: {seed}') print(f'GPU in use: {torch.cuda.is_available()}') print(f'Check success: {check_success}') # Save experiment initial information experiment_info = { 'experiment_baseline': { 'victim_model': victim_model_name, 'reference_model_names': reference_model_names, 'dataset': dataset.value, 'loss': loss.value }, 'hyperparameters': { 'tau': tau, 'epsilon': epsilon, 'delta': delta, 'eta': eta, 'eta_g': eta_g }, 'settings': { 'n_images': n_images, 'image_limit': image_limit, 'compare_gradients': compare_gradients, 'gpu': torch.cuda.is_available(), 'seed': seed, 'check_success': check_success }, 'results': { # Initialize dict entry to save results later. } } # Load data using required dataset data_loader, classes = load_data(dataset, True) num_classes = len(classes) # Load reference models reference_models = [ load_model(model_name, num_classes) for model_name in reference_model_names ] # Load victim model victim_model = load_model(victim_model_name, num_classes) # Move models to CUDA, if available if torch.cuda.is_available(): reference_models = list( map(lambda model: model.to('cuda'), reference_models)) victim_model = victim_model.to('cuda') # Get loss function criterion = load_loss(loss) # Set victim model to `eval()` mode to avoid dropout and batch normalization victim_model.eval() # Initialize images counter counter = 0 # Initalize the arrays to save results queries = [] final_models = [] all_true_gradient_norms = [] all_estimated_gradient_norms = [] all_gradient_products = [] all_true_losses = [] all_common_signs = [] all_subs_common_signs = [] # Initialize timing information run_time = datetime.datetime.now().replace(microsecond=0) tic = time.time() print(f'\n----- Beginning at {run_time} -----') # Loop over the dataset for data, target in data_loader: print(f'\n--------------------------------------------\n') print(f'Target number {counter}\n') # Attack the image (queries_counter, gradient_products, true_gradient_norms, estimated_gradient_norms, true_losses, common_signs, subs_common_signs, final_model) = \ attack(data, criterion, target, epsilon, tau, delta, eta_g, eta, victim_model, reference_models, image_limit, compare_gradients, show_images, check_success=check_success) counter += 1 # Save the results of the attack queries.append(queries_counter) final_models.append(final_model) all_gradient_products.append(gradient_products) all_true_gradient_norms.append(true_gradient_norms) all_estimated_gradient_norms.append(estimated_gradient_norms) all_true_losses.append(true_losses) all_common_signs.append(common_signs) all_subs_common_signs.append(subs_common_signs) # Stop if all the required images have been attacked if counter == n_images: break # Save the total time total_time = time.time() - tic # Make an np.array aout of the queries array to print some stats queries_array = np.array(queries) failed = queries_array == -1 print(f'\n-------------\n') print(f'Experiment finished:\n') print(f'Mean number of queries: {queries_array[~failed].mean()}') print(f'Median number of queries: {np.median(queries_array[~failed])}') print(f'Number of failed queries: {len(queries_array[failed])}') print(f'Total time: {total_time} s') print(f'\n-------------\n') # Save experiment run information experiment_info['results']['queries'] = queries_array experiment_info['results']['total_time'] = total_time experiment_info['results']['final_model'] = final_models # Save gradients information, if required by experiment run if compare_gradients: experiment_info['results']['gradient_products'] = np.array( all_gradient_products) experiment_info['results']['true_gradient_norms'] = np.array( all_true_gradient_norms) experiment_info['results']['estimated_gradient_norms'] = np.array( all_estimated_gradient_norms) experiment_info['results']['true_losses'] = np.array(all_true_losses) experiment_info['results']['common_signs'] = np.array(all_common_signs) experiment_info['results']['subs_common_signs'] = np.array( all_subs_common_signs) # Take care of results output folder results_path = OUTPUT_DIR experiment_info_filename = run_time.strftime('%Y-%m-%d.%H-%M') if not os.path.exists(results_path): os.makedirs(results_path) # Save results np.save(results_path + experiment_info_filename, experiment_info, allow_pickle=True)
def main(): data_generator = load_data() _history = [] device = None model = None criterion = None fold_index = 0 for TEXT, LABEL, train_data, val_data in data_generator.get_fold_data( num_folds=args['num_folds']): logger.info("***** Running Training *****") logger.info(f"Now fold: {fold_index + 1} / {args['num_folds']}") TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.300d") logger.info(f'Embedding size: {TEXT.vocab.vectors.size()}.') LABEL.build_vocab(train_data) model = Model(len(TEXT.vocab), args['embedding_dim'], args['hidden_dim'], args['output_dim'], args['num_layers'], args['dropout']) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() if args['gpu'] is True and args['gpu_number'] is not None: torch.cuda.set_device(args['gpu_number']) device = torch.device('cuda') model = model.to(device) criterion = criterion.to(device) else: device = torch.device('cpu') model = model.to(device) criterion = criterion.to(device) train_iterator = data.Iterator(train_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device) val_iterator = data.Iterator(val_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device) for epoch in range(args['epochs']): train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion) logger.info( f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%' ) val_loss, val_acc = eval_run(model, val_iterator, criterion) logger.info( f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |') _history.append([val_loss, val_acc]) fold_index += 1 _history = np.asarray(_history) loss = np.mean(_history[:, 0]) acc = np.mean(_history[:, 1]) logger.info('***** Cross Validation Result *****') logger.info(f'LOSS: {loss}, ACC: {acc}')
def nextDayPrediction(typeBlockchain, stock): df = get_data.get_data_frame(typeBlockchain, stock) x_scaler = MinMaxScaler() y_scaler = MinMaxScaler() all_df = df.copy() x = all_df[['open', 'low', 'high', 'volume']].copy() y = all_df['close'].copy() x = pd.ewma(x,2) y = pd.ewma(y,2) x[['open', 'low', 'high', 'volume']] = x_scaler.fit_transform(x) NUM_FEATURES = x.shape[1] y = y_scaler.fit_transform(y.values.reshape(-1, 1)) x['close'] = y #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False) X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True) x = x[['open', 'low', 'high', 'volume']] model = build_model(input_shape=(WINDOW, NUM_FEATURES)) print('START FIT MODEL...') start = time.time() #history = History() #history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=500,verbose=0, # callbacks=[history]) model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=0) end = time.time() print ('Learning time: ', end-start) today = time.strftime("_%d_%m_%Y") pathModel = "../../models/model_5f_" + typeBlockchain + today +".h5" save_model(model, pathModel) #model = load_model(pathModel) # one day prediction. get last batch known data (now we didnt need in y value and can predict it) lastbatch = np.array(x[-WINDOW:]) pred = model.predict([lastbatch.reshape(1,WINDOW, NUM_FEATURES)]) pred = np.array(y_scaler.inverse_transform(pred)) # predicted value # now we make dataframe and create row names in date lastDate =str(df.date[df.last_valid_index()]).split('-') currentData = datetime.date(int(lastDate[0]),int(lastDate[1]),int(lastDate[2])) + datetime.timedelta(1) predictionDate = pd.date_range(currentData, periods=1) prediction = pd.DataFrame(pred, columns=["predictionPrice"], index = predictionDate.values) print (prediction) del model K.clear_session() return prediction
# Model from src.models.general_CNN import CNNModel # Generator and pre-processing. from src.audio_data_generator import AudioDataGenerator TRAIN = True WRITE_RESULTS = True # File paths to data TRAIN_PATH = './input/train/audio/' TEST_PATH = './input/test/audio' VAL_FILE_PATH = './input/train/validation_list.txt' # What feature representation we use. FEATURE_REP = 'log_mel_spectrogram' (x_train, y_train), (x_val, y_val), label_binarizer = load_data(path=TRAIN_PATH, val_path=VAL_FILE_PATH) model_instance = CNNModel() audio_preprocessor = AudioDataGenerator(generator_method=FEATURE_REP) if TRAIN: model = model_instance.create_model( audio_preprocessor.get_data_shape(x_train[0])) tensorboard = TensorBoard(log_dir='./logs/{}'.format(time.time()), batch_size=model_instance.BATCH_SIZE) checkpoint = ModelCheckpoint(model_instance.checkpoint_path, monitor='val_loss') early_stop = EarlyStopping(monitor='val_loss',
def nextDayPrediction(typeBlockchain, stock): """ Triggers for plotting """ DJA = IdexDataframe("DJA") GSPC = IdexDataframe("GSPC") NYA = IdexDataframe("NYA") loaded = get_data.get_data_frame(typeBlockchain, stock) loaded.index = loaded.date loaded = loaded[['open', 'close', 'low', 'high', 'volume']] df = pd.concat([DJA, GSPC, NYA, loaded], axis=1, ignore_index=False) df = df.fillna(method='ffill') df = df.dropna(axis=0, how='any') x_scaler = MinMaxScaler() y_scaler = MinMaxScaler() all_df = df.copy() feature = [ 'OpenDJA', 'HighDJA', 'LowDJA', 'CloseDJA', 'Adj CloseDJA', 'VolumeDJA', 'OpenGSPC', 'HighGSPC', 'LowGSPC', 'CloseGSPC', 'Adj CloseGSPC', 'VolumeGSPC', 'OpenNYA', 'HighNYA', 'LowNYA', 'CloseNYA', 'Adj CloseNYA', 'VolumeNYA', 'open', 'low', 'high', 'volume' ] x = all_df[feature].copy() y = all_df['close'].copy() #x = pd.ewma(x,2) #y = pd.ewma(y,2) x[feature] = x_scaler.fit_transform(x) y = y_scaler.fit_transform(y.values.reshape(-1, 1)) x['close'] = y num_features = x.shape[1] #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False) X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size=1.0, TrainTest=True) model = build_model(input_shape=(WINDOW, num_features)) print('START FIT MODEL...') start = time.time() #history = History() #history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=500,verbose=0, # callbacks=[history]) model.fit(X_train, y_train, batch_size=32, epochs=5, verbose=1) end = time.time() print('Learning time: ', end - start) today = time.strftime("_%d_%m_%Y") pathModel = "../../models/model_5f_" + typeBlockchain + today + ".h5" save_model(model, pathModel) del model K.clear_session() model = load_model(pathModel) #model = load_model(pathModel) # one day prediction. get last batch known data (now we didnt need in y value and can predict it) lastbatch = np.array(x[-WINDOW:]) pred = model.predict([lastbatch.reshape(1, 22, num_features)]) pred = np.array(y_scaler.inverse_transform(pred)) # predicted value prediction = pred.reshape(-1) print(prediction) return prediction
from src.load_data import load_data, prepare_data from src.model import classifier_model from src.train import train_model, save_model, evaluate_model from src.utils import plot_learning_curves # Main file to train a model with the name defined below model_name = "2_blocks_20_epochs" # Load and prepare dataset x_train, y_train, x_test, y_test = load_data() x_train, y_train, x_test, y_test = prepare_data(x_train, y_train, x_test, y_test) # Train model on data model = classifier_model() trained_model, model_history = train_model(model, x_train, y_train, x_test, y_test) # Save model and output results save_model(trained_model, model_name) evaluate_model(trained_model, x_test, y_test) plot_learning_curves(model_history, model_name)
# Local local_data_path = '../data/fer2013/fer2013.csv' local_data_path2 = 'data/fer2013/fer2013.csv' data_path_r = None if os.path.isfile(local_data_path2): data_path_r = local_data_path2 elif os.path.isfile(local_data_path): data_path_r = local_data_path else: data_path_r = drive_data_path img_size = (48, 48) x_train, y_train, x_val, y_val, x_test, y_test = load_data(data_path_r) history = pickle.load(open("history.bin", "rb")) save_path_2 = "save_model_2.ckpt" model = load_model(save_path_2) pred_y = model.predict(x_test) print(pred_y[0], y_test[0]) def make_int_y(y): y = np.array(y) y__ = [] for k in y:
box_axs[8, 6].axis("off") box_axs[8, 5].axis("off") box_axs[8, 4].axis("off") for i, col_name in enumerate(column_names): r = int(i / 8) c = int(i % 8) ax = box_axs[r, c] sns.boxplot(train_df[col_name], ax=ax, palette=cmap) box_fig.savefig('images/boxplots.png') # # Barplots # train_df = load_data() column_names = train_df.columns.tolist()[:1] bar_fig = Figure(figsize=(50, 50)) bar_fig.tight_layout() bar_axs = bar_fig.subplots(9, 8) bar_axs[8, 7].axis("off") bar_axs[8, 6].axis("off") bar_axs[8, 5].axis("off") bar_axs[8, 4].axis("off") for i, col_name in enumerate(column_names): r = int(i / 8) c = int(i % 8) ax = bar_axs[r, c]
def nextDayPrediction(typeBlockchain, stock): plot = True plotHictory = False interactiveGrapth = True plotForTrain = False df = get_data.get_data_frame(typeBlockchain, stock) df.index = df.date df = df[['open', 'close', 'low', 'high', 'volume']] df = ma_rel_diff(df) df = ema_rel_diff(df) df = mom(df) df = roc(df) df = bbands(df) df = normalized_bbands(df) df = rsi(df) df = stochastics(df) df = macd(df) df = atr(df) df = adx(df) df = df.dropna() x_scaler = MinMaxScaler() y_scaler = MinMaxScaler() all_df = df.copy() features = ['macd_12_26_9', 'stoch_d_14_3', 'roc_14'] # ['moment_20', 'ema_rel_diff_10', 'ma_rel_diff_50'],\ # ['atr_14', 'moment_20'], # ['atr_14', 'moment_20','low', 'high'], # ['roc_14', 'moment_20', 'ema_rel_diff_10' ], # ['roc_14', 'rsi_14'], ['roc_14', 'rsi_14', 'macd_12_26_9'] x = all_df[features].copy() y = all_df['close'].copy() NUM_FEATURES = x.shape[1] x[features] = x_scaler.fit_transform(x) y = y_scaler.fit_transform(y.values.reshape(-1, 1)) x['close'] = y ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!! X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size=0.96, TrainTest=True) x = all_df[features].copy() ####!!!!!!!!!!!!!!!!!!!!!!!!!!!!! model = build_model(input_shape=(WINDOW, NUM_FEATURES)) print('START FIT MODEL...') print(features) print() start = time.time() #history = History() #history= model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32,\ # epochs=500,verbose=0, # callbacks=[history]) model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=1) end = time.time() print('Learning time: ', end - start) today = time.strftime("_%d_%m_%Y") pathModel = "./model_" + str(features) + typeBlockchain + today + ".h5" #pathModel = "../../models/model_low_high_USDT_BTC_03_08_2017.h5" save_model(model, pathModel) #model = load_model(pathModel) lastbatch = np.array(x[-WINDOW:]) pred = model.predict([lastbatch.reshape(1, WINDOW, NUM_FEATURES)]) pred = np.array(y_scaler.inverse_transform(pred)) # predicted value # one day prediction. get last batch known data (now we didnt need in y value and can predict it) lastDate = str(df.last_valid_index()).split('-') currentData = datetime.date(int(lastDate[0]), int(lastDate[1]), int(lastDate[2])) + datetime.timedelta(1) predictionDate = pd.date_range(currentData, periods=1) prediction = pd.DataFrame(pred, columns=["predictionPrice"], index=predictionDate.values) print(prediction) return prediction
"weekofyear", 'summer', 'autumn', 'winter', "cos_weekday", "sin_weekday", 'udsprevisionempresa_shifted-1', 'udsprevisionempresa_shifted-6', 'udsstock_shifted7', 'roll4wd_udsstock_shifted7', 'roll4wd_udsstock' ]] _, _, predict, metrics = run(train_data, "udsstock", base_model, params, tags) predict["modelo"] = modelo return metrics, predict if __name__ == "__main__": # Load Stock data data = load_data() # # Initalize base model # from sklearn.linear_model import LinearRegression # base_model = LinearRegression() from sklearn.ensemble import RandomForestRegressor base_model = RandomForestRegressor(n_estimators=200) params = {"n_estimators": 200} with open("config/model_stock.json") as config_file: config = json.load(config_file) arg1 = "all" # Si especificamos todos, entrena todos los modelos configurados if arg1 == "all":