def load_dataset(params, eval_split, train_remove_invalid, eval_remove_invalid, load_cnn_predictions=False, load_cnn_features=False, load_cnn_features_train=False): if params['dataset'] == 'inat_2017': data_dir = get_paths('inat_2017_data_dir') num_classes = 5089 class_of_interest = 3731 # load observations train_locs, train_classes, train_users, train_dates, train_inds = \ load_inat_data(data_dir, 'train2017_locations.json', 'train2017.json', train_remove_invalid) if eval_split == 'val': val_locs, val_classes, val_users, val_dates, val_inds = \ load_inat_data(data_dir, eval_split+'2017_locations.json', eval_split+'2017.json', eval_remove_invalid) elif eval_split == 'test': val_locs, val_classes, val_users, val_dates, val_inds = \ load_inat_data(data_dir, eval_split+'2017_locations.json', eval_split+'2017_DO_NOT_SHARE.json', eval_remove_invalid) val_split = pd.read_csv(data_dir + 'kaggle_solution_2017_DO_NOT_SHARE.csv' )['usage'].values == 'Private' # load class names with open(data_dir + 'categories2017.json') as da: cls_data = json.load(da) class_names = [cc['name'] for cc in cls_data] class_ids = [cc['id'] for cc in cls_data] classes = dict(zip(class_ids, class_names)) if load_cnn_predictions: val_preds = load_sparse_feats(data_dir + 'features_inception/inat2017_' + eval_split + '_preds_sparse.npz') if load_cnn_features: val_feats = np.load(data_dir + 'features_inception/inat2017_' + eval_split + '_net_feats.npy') if load_cnn_features_train: train_feats = np.load( data_dir + 'features_inception/inat2017_train_net_feats.npy') elif params['dataset'] == 'inat_2018': data_dir = get_paths('inat_2018_data_dir') num_classes = 8142 class_of_interest = 3731 # wood thrush # load observations train_locs, train_classes, train_users, train_dates, train_inds = \ load_inat_data(data_dir, 'train2018_locations.json', 'train2018.json', train_remove_invalid) if eval_split == 'val': val_locs, val_classes, val_users, val_dates, val_inds = \ load_inat_data(data_dir, eval_split+'2018_locations.json', eval_split+'2018.json', eval_remove_invalid) elif eval_split == 'test': val_locs, val_classes, val_users, val_dates, val_inds = \ load_inat_data(data_dir, eval_split+'2018_locations.json', eval_split+'2018_DO_NOT_SHARE.json', eval_remove_invalid) val_split = pd.read_csv(data_dir + 'kaggle_solution_2018_DO_NOT_SHARE.csv' )['usage'].values == 'Private' # load class names with open(data_dir + 'categories2018.json') as da: cls_data = json.load(da) class_names = [cc['name'] for cc in cls_data] class_ids = [cc['id'] for cc in cls_data] classes = dict(zip(class_ids, class_names)) if load_cnn_predictions: if params['inat2018_resolution'] == 'high_res': val_preds = load_sparse_feats( data_dir + 'features_inception_hr/inat2018_' + eval_split + '_preds_sparse.npz') else: val_preds = load_sparse_feats(data_dir + 'features_inception/inat2018_' + eval_split + '_preds_sparse.npz') if load_cnn_features: if params['inat2018_resolution'] == 'high_res': val_feats = np.load(data_dir + 'features_inception_hr/inat2018_' + eval_split + '_net_feats.npy') else: val_feats = np.load(data_dir + 'features_inception/inat2018_' + eval_split + '_net_feats.npy') if load_cnn_features_train: if params['inat2018_resolution'] == 'high_res': train_feats = np.load( data_dir + 'features_inception_hr/inat2018_train_net_feats.npy') else: train_feats = np.load( data_dir + 'features_inception/inat2018_train_net_feats.npy') elif params['dataset'] == 'birdsnap': data_dir = get_paths('birdsnap_data_dir') ann_file_name = 'birdsnap_with_loc_2019.json' num_classes = 500 class_of_interest = 0 # load observations train_locs, train_classes, train_users, train_dates, train_inds = \ load_bird_data(data_dir, ann_file_name, 'train', train_remove_invalid, params['meta_type']) val_locs, val_classes, val_users, val_dates, val_inds = \ load_bird_data(data_dir, ann_file_name, eval_split, eval_remove_invalid, params['meta_type']) # load class names with open(data_dir + ann_file_name) as da: class_names = json.load(da)['classes'] classes = dict(zip(range(len(class_names)), class_names)) if load_cnn_predictions: val_preds = load_sparse_feats(data_dir + 'features_inception/birdsnap_' + eval_split + '_preds_sparse.npz') if load_cnn_features: val_feats = np.load(data_dir + 'features_inception/birdsnap_' + eval_split + '_net_feats.npy') if load_cnn_features_train: train_feats = np.load( data_dir + 'features_inception/birdsnap_train_net_feats.npy') elif params['dataset'] == 'nabirds': data_dir = get_paths('nabirds_data_dir') ann_file_name = 'nabirds_with_loc_2019.json' num_classes = 555 class_of_interest = 0 # load observations train_locs, train_classes, train_users, train_dates, train_inds = \ load_bird_data(data_dir, ann_file_name, 'train', train_remove_invalid, params['meta_type']) val_locs, val_classes, val_users, val_dates, val_inds = \ load_bird_data(data_dir, ann_file_name, eval_split, eval_remove_invalid, params['meta_type']) # load class names with open(data_dir + ann_file_name) as da: class_names = json.load(da)['classes'] classes = dict(zip(range(len(class_names)), class_names)) if load_cnn_predictions: val_preds = load_sparse_feats(data_dir + 'features_inception/nabirds_' + eval_split + '_preds_sparse.npz') if load_cnn_features: val_feats = np.load(data_dir + 'features_inception/nabirds_' + eval_split + '_net_feats.npy') if load_cnn_features_train: train_feats = np.load( data_dir + 'features_inception/nabirds_train_net_feats.npy') elif params['dataset'] == 'yfcc': data_dir = get_paths('yfcc_data_dir') print(' No user or date features for yfcc.') params['use_date_feats'] = False params['balanced_train_loader'] = False num_classes = 100 class_of_interest = 9 # beach # load observations train_locs, train_classes, train_users, train_dates = load_yfcc_data( data_dir, 'train_test_split.csv', 'train') val_locs, val_classes, val_users, val_dates = load_yfcc_data( data_dir, 'train_test_split.csv', eval_split) train_inds = np.arange(train_locs.shape[0]) val_inds = np.arange(val_locs.shape[0]) # load class names da = pd.read_csv(data_dir + 'class_names.csv') classes = dict(zip(da['id'].values, da['name'].values)) if load_cnn_predictions: val_preds = np.load(data_dir + 'features_inception/YFCC_' + eval_split + '_preds.npy') if load_cnn_features: val_feats = np.load(data_dir + 'features_inception/YFCC_' + eval_split + '_net_feats.npy') if load_cnn_features_train: train_feats = np.load( data_dir + 'features_inception/YFCC_train_net_feats.npy') if load_cnn_features_train and train_remove_invalid: train_feats = train_feats[train_inds, :] if load_cnn_features and eval_remove_invalid: val_feats = val_feats[val_inds, :] val_preds = val_preds[val_inds, :] # return data in dictionary op = {} op['train_locs'] = train_locs op['train_classes'] = train_classes op['train_users'] = train_users op['train_dates'] = train_dates op['val_locs'] = val_locs op['val_classes'] = val_classes op['val_users'] = val_users op['val_dates'] = val_dates op['class_of_interest'] = class_of_interest op['classes'] = classes op['num_classes'] = num_classes if load_cnn_predictions: op['val_preds'] = val_preds # class predictions from trained image classifier if load_cnn_features: op['val_feats'] = val_feats # features from trained image classifier assert val_feats.shape[0] == val_locs.shape[0] if load_cnn_features_train: op['train_feats'] = train_feats # features from trained image classifier assert train_feats.shape[0] == train_locs.shape[0] # if it exists add the data split try: op['val_split'] = val_split except: op['val_split'] = np.ones(val_locs.shape[0], dtype=np.int) return op
import json from sklearn.manifold import TSNE import os import sys sys.path.append('../') from geo_prior import models from geo_prior.paths import get_paths import geo_prior.datasets as dt import geo_prior.grid_predictor as grid users_of_interest = [344, 345, 42] users_of_interest_cols = ['r', 'y', 'k'] model_path = '../models/model_inat_2018_full_final.pth.tar' data_dir = get_paths('inat_2018_data_dir') min_num_exs = 100 seed = 2001 dpi = 150.0 num_time_steps = 12 op_dir = 'images/user_ims/' if not os.path.isdir(op_dir): os.makedirs(op_dir) # this has been precomputed by plot_class_embedding.py if os.path.isfile('images/class_ims/all_classes.npz'): class_embedding = np.load('images/class_ims/all_classes.npz') else: print('Error: Need to run plot_class_embedding.py first.') sys.exit()
if not os.path.isdir(op_dir): os.makedirs(op_dir) # load model net_params = torch.load(model_path, map_location='cpu') params = net_params['params'] model = models.FCNet(num_inputs=params['num_feats'], num_classes=params['num_classes'], num_filts=params['num_filts'], num_users=params['num_users']).to(params['device']) model.load_state_dict(net_params['state_dict']) model.eval() # load ocean mask mask = np.load(get_paths('mask_dir') + 'ocean_mask.npy') # grid predictor - for making dense predictions for each lon/lat location gp = grid.GridPredictor(mask, params, mask_only_pred=True) # compute intermediate network features print('Computing features.') feats = [] for time_step in np.linspace(0, 1, num_time_steps + 1)[:-1]: net_feats = gp.dense_prediction_masked_feats(model, time_step).data.cpu().numpy() feats.append(net_feats) # downsample features - choose middle time step dsf = decomposition.FastICA(n_components=num_ds_dims, random_state=seed) dsf.fit(feats[len(feats) // 2])
import torch import numpy as np import matplotlib.pyplot as plt import json import os from sklearn.manifold import TSNE import sys sys.path.append('../') from geo_prior import models from geo_prior.paths import get_paths seed = 2001 model_path = '../models/model_inat_2018_full_final.pth.tar' data_dir = get_paths('inat_2018_data_dir') op_dir = 'images/class_ims/' if not os.path.isdir(op_dir): os.makedirs(op_dir) with open(data_dir + 'categories2018.json') as da: cls_data = json.load(da) class_names = [cc['name'] for cc in cls_data] class_ids = [cc['id'] for cc in cls_data] supercat_names = [cc['supercategory'] for cc in cls_data] supercat_un, supercat_ids = np.unique(supercat_names, return_inverse=True) # load model net_params = torch.load(model_path, map_location='cpu') params = net_params['params'] model = models.FCNet(num_inputs=params['num_feats'],
import torch import numpy as np import matplotlib.pyplot as plt import json import os import sys sys.path.append('../') from geo_prior import models from geo_prior.paths import get_paths import geo_prior.datasets as dt inat_year = '2018' # '2017' or '2018' model_path = '../models/model_inat_' + inat_year + '_full_final.pth.tar' data_dir = get_paths('inat_' + inat_year + '_data_dir') min_num_exs = 100 seed = 2001 dpi = 150.0 op_dir = 'images/metadata_stats/' if not os.path.isdir(op_dir): os.makedirs(op_dir) # load class info with open(data_dir + 'categories' + inat_year + '.json') as da: cls_data = json.load(da) class_names = [cc['name'] for cc in cls_data] class_ids = [cc['id'] for cc in cls_data] supercat_names = [cc['supercategory'] for cc in cls_data] supercat_un, supercat_ids = np.unique(supercat_names, return_inverse=True)