def main(argv): del argv # unused filter_class_ids = list(map(int, FLAGS.class_ids)) logging.info('calculating scores for classes %s from %s' % (', '.join(map(str, filter_class_ids)), FLAGS.dataset)) dataset_class = DATASET_MAP[FLAGS.dataset] data_dir = DATA_DIR_MAP[FLAGS.dataset] logging.info('loading data...') input_image_size = 224 dataset = dataset_class.load_all_data( data_dir, input_image_size, filter_class_ids) logging.info('loaded %d images' % len(dataset)) logging.info('loading model...') model = _make_cuda(Vgg16()) context = Context( model=model, dataset=dataset, layer_idx=FLAGS.layer_idx) out_filename = '-'.join([ f'vgg16_layer{FLAGS.layer_idx}', FLAGS.dataset, '_'.join(map(str, filter_class_ids)), 'scores.npz']) out_dirpath = os.path.join(SCRATCH_DIR, 'scores') os.makedirs(out_dirpath, exist_ok=True) out_filepath = os.path.join(out_dirpath, out_filename) logging.info('saving output to %s' % out_filepath) all_scores_matrix = None all_column_ids = list() for image_idx in trange(len(context.dataset)): if all_scores_matrix is None: scores, cols = _get_score_matrix_for_image( image_idx, FLAGS.num_max_proposals, context) all_scores_matrix = scores all_column_ids += cols else: scores, cols = _get_score_matrix_for_image( image_idx, FLAGS.num_max_proposals, context) all_scores_matrix = np.concatenate( (all_scores_matrix, scores), axis=1) all_column_ids += cols np.savez(out_filepath, scores=all_scores_matrix, cols=all_column_ids) notify(f'Finished: {FLAGS.dataset} - {FLAGS.class_ids}', namespace='scores')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds_snorkel') os.makedirs(preds_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds_snorkel.npz']) preds_out_filepath = os.path.join( preds_out_dirpath, preds_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data( CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data( AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] seed = sum(v * (10 ** (3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) scores, col_ids = load_scores( os.path.join( SCRATCH_DIR, 'scores', f'vgg16_layer30-{FLAGS.dataset}-%d_%d-scores.npz' % tuple(class_ids))) new_scores_np = get_labeling_matrix_for_GOOGGLES(scores) L_tr, L_te = new_scores_np, new_scores_np _, y_snorkel, _ = train_snorkel_gen_model(L_tr, L_te) np.savez(preds_out_filepath, y_true=y_true, y_snorkel=y_snorkel) logging.info(f'saved predictions at {preds_out_filepath}') snorkel_acc = best_acc(y_true, y_snorkel) notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{snorkel_acc}' % ', '.join(map(str, class_ids)), namespace='goggles-snorkel')
def main(argv): del argv # unused filter_class_ids = list(map(int, FLAGS.class_ids)) logging.info('calculating scores for classes %s from %s' % (', '.join(map(str, filter_class_ids)), FLAGS.dataset)) dataset_class = DATASET_MAP[FLAGS.dataset] data_dir = DATA_DIR_MAP[FLAGS.dataset] logging.info('loading data...') input_image_size = 224 dataset = dataset_class.load_all_data(data_dir, input_image_size, filter_class_ids) logging.info('loaded %d images' % len(dataset)) logging.info('loading model...') model = _make_cuda(Vgg16()) context = Context(model=model, dataset=dataset, layer_idx=FLAGS.layer_idx) out_filename = '-'.join([ f'vgg16_layer{FLAGS.layer_idx}', FLAGS.dataset, '_'.join(map(str, filter_class_ids)), 'scores.npz' ]) out_dirpath = os.path.join(SCRATCH_DIR, 'scores') os.makedirs(out_dirpath, exist_ok=True) out_filepath = os.path.join(out_dirpath, out_filename) logging.info('saving output to %s' % out_filepath) all_scores_matrix = None all_column_ids = list() for image_idx in trange(len(context.dataset)): if all_scores_matrix is None: scores, cols = _get_score_matrix_for_image(image_idx, FLAGS.num_max_proposals, context) all_scores_matrix = scores all_column_ids += cols else: scores, cols = _get_score_matrix_for_image(image_idx, FLAGS.num_max_proposals, context) all_scores_matrix = np.concatenate((all_scores_matrix, scores), axis=1) all_column_ids += cols np.savez(out_filepath, scores=all_scores_matrix, cols=all_column_ids) notify(f'Finished: {FLAGS.dataset} - {FLAGS.class_ids}', namespace='scores')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds_snorkel') os.makedirs(preds_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds_snorkel.npz' ]) preds_out_filepath = os.path.join(preds_out_dirpath, preds_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data(CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data(AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] seed = sum(v * (10**(3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) scores, col_ids = load_scores( os.path.join( SCRATCH_DIR, 'scores', f'vgg16_layer30-{FLAGS.dataset}-%d_%d-scores.npz' % tuple(class_ids))) new_scores_np = get_labeling_matrix_for_GOOGGLES(scores) L_tr, L_te = new_scores_np, new_scores_np _, y_snorkel, _ = train_snorkel_gen_model(L_tr, L_te) np.savez(preds_out_filepath, y_true=y_true, y_snorkel=y_snorkel) logging.info(f'saved predictions at {preds_out_filepath}') snorkel_acc = best_acc(y_true, y_snorkel) notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{snorkel_acc}' % ', '.join(map(str, class_ids)), namespace='goggles-snorkel')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds-multi') models_out_dirpath = os.path.join(SCRATCH_DIR, 'models-multi') os.makedirs(preds_out_dirpath, exist_ok=True) os.makedirs(models_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds.npz' ]) kmeans_init_model_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'kmeans_init', 'model.pkl' ]) rand_init_model_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'rand_init', 'model.pkl' ]) preds_out_filepath = os.path.join(preds_out_dirpath, preds_out_filename) kmeans_init_model_out_filepath = os.path.join( models_out_dirpath, kmeans_init_model_out_filename) rand_init_model_out_filepath = os.path.join(models_out_dirpath, rand_init_model_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath assert not os.path.exists(kmeans_init_model_out_filepath), \ 'Model (k-means init) for this run already exists at %s' % \ kmeans_init_model_out_filepath assert not os.path.exists(rand_init_model_out_filepath), \ 'Model (random init) for this run already exists at %s' % \ rand_init_model_out_filepath logging.info(f'calculating run {FLAGS.run} accuracies ' f'for classes %s from %s' % (', '.join(map(str, class_ids)), FLAGS.dataset)) input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data(CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data(AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] scores, col_ids = load_scores(os.path.join( SCRATCH_DIR, 'scores', f'vgg16_layer30-{FLAGS.dataset}-%s-scores.npz' % '_'.join(map(str, class_ids))), pick_one_prototype=True) seed = sum(v * (3**(3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) num_classes = len(class_ids) logging.info(f'performing {num_classes}-class labeling...') y_kmeans = KMeans(n_clusters=num_classes).fit_predict(scores) kmeans_acc = best_acc(y_true, y_kmeans) try: kmeans_init_model, y_kmeans_em = \ GogglesProbabilisticModel.run_em(scores, col_ids, y_kmeans, p1=None, update_prior=True) kmeans_init_model.save_model(kmeans_init_model_out_filepath) logging.info(f'saved k-means init model at ' f'{kmeans_init_model_out_filepath}') kmeans_em_acc = best_acc(y_true, y_kmeans_em) except Exception as e: print(e) kmeans_em_acc = 0. logging.info('image counts: %s' % str(Counter(y_true))) logging.info('only kmeans accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_acc)) logging.info('kmeans + em accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_em_acc)) np.savez(preds_out_filepath, y_true=y_true, y_kmeans=y_kmeans, y_kmeans_em=y_kmeans_em) logging.info(f'saved predictions at {preds_out_filepath}') notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{kmeans_acc}, {kmeans_em_acc}' % ', '.join(map(str, class_ids)), namespace='inference-multi')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds_multi_class') # models_out_dirpath = os.path.join(SCRATCH_DIR, 'models_multi_class') os.makedirs(preds_out_dirpath, exist_ok=True) # os.makedirs(models_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds.npz']) # kmeans_init_model_out_filename = '-'.join([ # FLAGS.dataset, # '_'.join(map(str, class_ids)), # 'run_%02d' % FLAGS.run, # 'kmeans_init', # 'model.pkl']) # rand_init_model_out_filename = '-'.join([ # FLAGS.dataset, # '_'.join(map(str, class_ids)), # 'run_%02d' % FLAGS.run, # 'rand_init', # 'model.pkl']) preds_out_filepath = os.path.join( preds_out_dirpath, preds_out_filename) # kmeans_init_model_out_filepath = os.path.join( # models_out_dirpath, kmeans_init_model_out_filename) # rand_init_model_out_filepath = os.path.join( # models_out_dirpath, rand_init_model_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath # assert not os.path.exists(kmeans_init_model_out_filepath), \ # 'Model (k-means init) for this run already exists at %s' % \ # kmeans_init_model_out_filepath # assert not os.path.exists(rand_init_model_out_filepath), \ # 'Model (random init) for this run already exists at %s' % \ # rand_init_model_out_filepath logging.info(f'calculating run {FLAGS.run} accuracies ' f'for classes %s from %s' % (', '.join(map(str, class_ids)), FLAGS.dataset)) input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data( CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data( AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] class_ids_filename = '_'.join(map(str, class_ids)) npz_filename = os.path.join(SCRATCH_DIR, 'scores', 'vgg16_layer30-{}-{}-scores.npz'.format(FLAGS.dataset, class_ids_filename)) scores, col_ids = load_scores(npz_filename) # scores, col_ids = load_scores( # os.path.join( # SCRATCH_DIR, 'scores', # f'vgg16_layer30-{FLAGS.dataset}-%d_%d-scores.npz' # % tuple(class_ids))) seed = sum(v * (10 ** (3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) y_kmeans = KMeans(n_clusters=2).fit_predict(scores) kmeans_acc = best_acc(y_true, y_kmeans) # try: # kmeans_init_model, y_kmeans_em = \ # GogglesProbabilisticModel.run_em(scores, col_ids, y_kmeans, # p1=None, update_prior=True) # # kmeans_init_model.save_model(kmeans_init_model_out_filepath) # logging.info(f'saved k-means init model at ' # f'{kmeans_init_model_out_filepath}') # # kmeans_em_acc = best_acc(y_true, y_kmeans_em) # except Exception as e: # print(e) # kmeans_em_acc = 0. # try: # y_init = np.random.randint(2, size=scores.shape[0]) # # rand_init_model, y_rand_em = \ # GogglesProbabilisticModel.run_em(scores, col_ids, y_init, # p1=None, update_prior=True) # rand_init_model.save_model(rand_init_model_out_filepath) # logging.info(f'saved rand init model at ' # f'{rand_init_model_out_filepath}') # # rand_em_acc = best_acc(y_true, y_rand_em) # except Exception as e: # print(e) # rand_em_acc = 0. logging.info('image counts: %s' % str(Counter(y_true))) logging.info('only kmeans accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_acc)) # logging.info('kmeans + em accuracy for classes %s: %0.9f' # % (', '.join(map(str, class_ids)), # kmeans_em_acc)) # logging.info('random init + em accuracy for classes %s: %0.9f' # % (', '.join(map(str, class_ids)), # rand_em_acc)) np.savez(preds_out_filepath, y_true=y_true, y_kmeans=y_kmeans) logging.info(f'saved predictions at {preds_out_filepath}') notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{kmeans_acc}' % ', '.join(map(str, class_ids)), namespace='inference')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds-multi') models_out_dirpath = os.path.join(SCRATCH_DIR, 'models-multi') os.makedirs(preds_out_dirpath, exist_ok=True) os.makedirs(models_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds.npz']) kmeans_init_model_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'kmeans_init', 'model.pkl']) rand_init_model_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'rand_init', 'model.pkl']) preds_out_filepath = os.path.join( preds_out_dirpath, preds_out_filename) kmeans_init_model_out_filepath = os.path.join( models_out_dirpath, kmeans_init_model_out_filename) rand_init_model_out_filepath = os.path.join( models_out_dirpath, rand_init_model_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath assert not os.path.exists(kmeans_init_model_out_filepath), \ 'Model (k-means init) for this run already exists at %s' % \ kmeans_init_model_out_filepath assert not os.path.exists(rand_init_model_out_filepath), \ 'Model (random init) for this run already exists at %s' % \ rand_init_model_out_filepath logging.info(f'calculating run {FLAGS.run} accuracies ' f'for classes %s from %s' % (', '.join(map(str, class_ids)), FLAGS.dataset)) input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data( CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data( AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] scores, col_ids = load_scores( os.path.join( SCRATCH_DIR, 'scores', f'vgg16_layer30-{FLAGS.dataset}-%s-scores.npz' % '_'.join(map(str, class_ids))), pick_one_prototype=True) seed = sum(v * (3 ** (3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) num_classes = len(class_ids) logging.info(f'performing {num_classes}-class labeling...') y_kmeans = KMeans(n_clusters=num_classes).fit_predict(scores) kmeans_acc = best_acc(y_true, y_kmeans) try: kmeans_init_model, y_kmeans_em = \ GogglesProbabilisticModel.run_em(scores, col_ids, y_kmeans, p1=None, update_prior=True) kmeans_init_model.save_model(kmeans_init_model_out_filepath) logging.info(f'saved k-means init model at ' f'{kmeans_init_model_out_filepath}') kmeans_em_acc = best_acc(y_true, y_kmeans_em) except Exception as e: print(e) kmeans_em_acc = 0. logging.info('image counts: %s' % str(Counter(y_true))) logging.info('only kmeans accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_acc)) logging.info('kmeans + em accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_em_acc)) np.savez(preds_out_filepath, y_true=y_true, y_kmeans=y_kmeans, y_kmeans_em=y_kmeans_em) logging.info(f'saved predictions at {preds_out_filepath}') notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{kmeans_acc}, {kmeans_em_acc}' % ', '.join(map(str, class_ids)), namespace='inference-multi')
def main(argv): del argv # unused preds_out_dirpath = os.path.join(SCRATCH_DIR, 'preds_multi_class') # models_out_dirpath = os.path.join(SCRATCH_DIR, 'models_multi_class') os.makedirs(preds_out_dirpath, exist_ok=True) # os.makedirs(models_out_dirpath, exist_ok=True) class_ids = list(map(int, FLAGS.class_ids)) preds_out_filename = '-'.join([ FLAGS.dataset, '_'.join(map(str, class_ids)), 'run_%02d' % FLAGS.run, 'preds.npz' ]) # kmeans_init_model_out_filename = '-'.join([ # FLAGS.dataset, # '_'.join(map(str, class_ids)), # 'run_%02d' % FLAGS.run, # 'kmeans_init', # 'model.pkl']) # rand_init_model_out_filename = '-'.join([ # FLAGS.dataset, # '_'.join(map(str, class_ids)), # 'run_%02d' % FLAGS.run, # 'rand_init', # 'model.pkl']) preds_out_filepath = os.path.join(preds_out_dirpath, preds_out_filename) # kmeans_init_model_out_filepath = os.path.join( # models_out_dirpath, kmeans_init_model_out_filename) # rand_init_model_out_filepath = os.path.join( # models_out_dirpath, rand_init_model_out_filename) assert not os.path.exists(preds_out_filepath), \ 'Predictions for this run already exists at %s' % preds_out_filepath # assert not os.path.exists(kmeans_init_model_out_filepath), \ # 'Model (k-means init) for this run already exists at %s' % \ # kmeans_init_model_out_filepath # assert not os.path.exists(rand_init_model_out_filepath), \ # 'Model (random init) for this run already exists at %s' % \ # rand_init_model_out_filepath logging.info(f'calculating run {FLAGS.run} accuracies ' f'for classes %s from %s' % (', '.join(map(str, class_ids)), FLAGS.dataset)) input_image_size = 224 if FLAGS.dataset == 'cub': dataset = CUBDataset.load_all_data(CUB_DATA_DIR, input_image_size, class_ids) elif FLAGS.dataset == 'awa2': dataset = AwA2Dataset.load_all_data(AWA2_DATA_DIR, input_image_size, class_ids) y_true = [v[1] for v in dataset] class_ids_filename = '_'.join(map(str, class_ids)) npz_filename = os.path.join( SCRATCH_DIR, 'scores', 'vgg16_layer30-{}-{}-scores.npz'.format(FLAGS.dataset, class_ids_filename)) scores, col_ids = load_scores(npz_filename) # scores, col_ids = load_scores( # os.path.join( # SCRATCH_DIR, 'scores', # f'vgg16_layer30-{FLAGS.dataset}-%d_%d-scores.npz' # % tuple(class_ids))) seed = sum(v * (10**(3 * i)) for i, v in enumerate(class_ids + [FLAGS.run])) random.seed(seed) np.random.seed(seed) y_kmeans = KMeans(n_clusters=2).fit_predict(scores) kmeans_acc = best_acc(y_true, y_kmeans) # try: # kmeans_init_model, y_kmeans_em = \ # GogglesProbabilisticModel.run_em(scores, col_ids, y_kmeans, # p1=None, update_prior=True) # # kmeans_init_model.save_model(kmeans_init_model_out_filepath) # logging.info(f'saved k-means init model at ' # f'{kmeans_init_model_out_filepath}') # # kmeans_em_acc = best_acc(y_true, y_kmeans_em) # except Exception as e: # print(e) # kmeans_em_acc = 0. # try: # y_init = np.random.randint(2, size=scores.shape[0]) # # rand_init_model, y_rand_em = \ # GogglesProbabilisticModel.run_em(scores, col_ids, y_init, # p1=None, update_prior=True) # rand_init_model.save_model(rand_init_model_out_filepath) # logging.info(f'saved rand init model at ' # f'{rand_init_model_out_filepath}') # # rand_em_acc = best_acc(y_true, y_rand_em) # except Exception as e: # print(e) # rand_em_acc = 0. logging.info('image counts: %s' % str(Counter(y_true))) logging.info('only kmeans accuracy for classes %s: %0.9f' % (', '.join(map(str, class_ids)), kmeans_acc)) # logging.info('kmeans + em accuracy for classes %s: %0.9f' # % (', '.join(map(str, class_ids)), # kmeans_em_acc)) # logging.info('random init + em accuracy for classes %s: %0.9f' # % (', '.join(map(str, class_ids)), # rand_em_acc)) np.savez(preds_out_filepath, y_true=y_true, y_kmeans=y_kmeans) logging.info(f'saved predictions at {preds_out_filepath}') notify(f'`{FLAGS.dataset}` - `%s` - `run {FLAGS.run}`: ' f'{kmeans_acc}' % ', '.join(map(str, class_ids)), namespace='inference')