def main(argv): del argv tf.logging.set_verbosity(tf.logging.ERROR) hparams = _default_hparams() if FLAGS.mode == 'train': utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, ) elif FLAGS.mode == 'predict': import cv2 from matplotlib import pyplot as plt from gen_tfrecords import central_crop with tf.Session() as sess: unet = utils.UNet(FLAGS.model_dir, sess) im = central_crop(cv2.imread(FLAGS.image), vw, vh) / 255.0 t = time() mask = unet.run(im) print("Inference took %f ms" % (1000*(time()-t))) image = .3 * im + .7 * np.squeeze(mask)[...,np.newaxis] plt.imshow(image) plt.show() else: raise ValueError("Unknown mode: %s" % FLAGS.mode)
def main(argv): del argv f = open("eval_output.txt", "a+") hparams = _default_hparams() if FLAGS.predict: predict(FLAGS.input, hparams, path=FLAGS.path) elif FLAGS.test: print("Testing") output = utils.eval(model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_test_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, task="test", path=FLAGS.path) print("Angular median: ", np.median(output['angular_array'])) print("Angular loss: ", output['angular_loss']) f.close elif FLAGS.eval: print("Evaluating") output = utils.eval(model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_test_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, task="eval", path=FLAGS.path) array_degree = output['angular_array'] * 180 / math.pi print("Angular median: ", np.median(array_degree)) print("Angular loss: ", output['angular_loss'] * 180 / math.pi) plt.hist(array_degree, bins=50, range=(0, 180)) plt.show() f.close else: utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, )
def main(): x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv', n=5000) print('Tokenization') x = [clean_html(text, strip=True) for text in x] x = [' '.join(tokenize(text)) for text in x] x_train, x_test, y_train.y_test = train_test_split(x, y, test_size=0.2, random_state=42) print('Binary') vectorizer = CountVectorizer(binary=True) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) print('COunt') vectorizer = CountVectorizer(binary=False) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) print('TF-IDF') vectorizer = TfidfVectorizer() train_and_eval(x_train, y_train, x_test, y_test, vectorizer) print('Bigram') vectorizer = TfidfVectorizer(ngram_range=(1, 2)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer)
def main(argv): del argv tf.logging.set_verbosity(tf.logging.ERROR) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) if FLAGS.mode == 'train': hparams = _default_hparams() utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, ) elif FLAGS.mode == 'save_desc': import test_net_save_desc print('### model save_desc') test_net_save_desc.save_desc(FLAGS.model_dir, FLAGS.data_dir, FLAGS.n_include, FLAGS.title) elif FLAGS.mode == 'pr': import test_net test_net.plot(FLAGS.model_dir, FLAGS.data_dir, FLAGS.n_include, FLAGS.title, netvlad_feat=FLAGS.netvlad_feat, include_calc=FLAGS.include_calc) elif FLAGS.mode == 'best': import test_net test_net.find_best_checkpoint(FLAGS.model_dir, FLAGS.data_dir, FLAGS.n_include) elif FLAGS.mode == 'ex': utils.show_example(FLAGS.image_fl, FLAGS.model_dir) else: raise ValueError("Unrecognized mode: %s" % FLAGS.mode)
def main(): x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv', n=1000) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) print('Tokenization only.') train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize) print('Clean html.') train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize, preprocessor=clean_html) print('Normalize number.') train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize, preprocessor=namaline_number) print('Base form.') train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize_base_form) print('Lower text.') train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize, lowercase=True)
def main(argv): del argv hparams = _default_hparams() if FLAGS.predict: predict(FLAGS.input, hparams) else: utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, )
def main(argv): del argv if args.test: test() else: for steps in range(1, 5): args.steps = steps * 100000 args.lr /= 2 hparams = _default_hparams() print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), " steps:", args.steps, " lr=", args.lr) utils.train_and_eval(model_dir=args.model_dir, model_fn=model_fn, create_input_fn=create_input_fn, create_test_input_fn=create_test_input_fn, hparams=hparams, steps=args.steps, batch_size=args.batch_size, sync_replicas=args.sync_replicas) test()
def main(argv): del argv tf.logging.set_verbosity(tf.logging.ERROR) if FLAGS.mode == 'train': hparams = _default_hparams() utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, ) elif FLAGS.mode == 'pr': import test_net test_net.plot(FLAGS.model_dir, FLAGS.data_dir, FLAGS.n_include, FLAGS.title, netvlad_feat=FLAGS.netvlad_feat, include_calc=FLAGS.include_calc) elif FLAGS.mode == 'best': import test_net test_net.find_best_checkpoint(FLAGS.model_dir, FLAGS.data_dir, FLAGS.n_include) elif FLAGS.mode == 'ex': utils.show_example(FLAGS.image_fl, FLAGS.model_dir) else: raise ValueError("Unrecognized mode: %s" % FLAGS.mode)
def main(argv): del argv f = open("eval_output.txt", "a+") hparams = _default_hparams() if FLAGS.predict: predict(FLAGS.input, hparams) elif FLAGS.eval: print("Running Inference") output = utils.eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, ) f.write(str(output)) f.close else: utils.train_and_eval( model_dir=FLAGS.model_dir, model_fn=model_fn, input_fn=create_input_fn, hparams=hparams, steps=FLAGS.steps, batch_size=FLAGS.batch_size, save_checkpoints_secs=600, eval_throttle_secs=1800, eval_steps=5, sync_replicas=FLAGS.sync_replicas, )
## ------------- ## Train:3largebio; Test: anatomy (retrain 0,1) print("----------------\nTrain:3largebio; Test: anatomy (retrain 0,1)") cross_tuples = [ ([Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_an], "1"), ] results_path = os.path.join("data", "largebio_anatomy_paper.pkl") if not os.path.isfile(results_path): df_results = u.train_and_eval(cross_tuples, classifiers, classifier_kwargs, undersample=True, save='data/largebio_anatomy_paper.pkl') else: pickle_off = open("data/largebio_anatomy_paper.pkl", "rb") df_results = pickle.load(pickle_off) ## -------------- ## Train LB with lb/conference features print('----------------\nTrain LB with lb/conference features') conf_lb_features = [ 'measure_aml', 'measure_dome', 'measure_logmap', 'measure_logmaplt', 'measure_wiktionary' ]
optimizer = optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=config.base_lr[0], momentum=0.9) scheduler = CyclicLR(optimizer=optimizer, base_lr=config.base_lr[0], max_lr=config.max_lr[0], step_size=config.step_size, mode='triangular2') elif config.model_name == 'resnet50': model = Resnet50(config.num_classes) optimizer = optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=config.base_lr[0], momentum=0.9) scheduler = CyclicLR(optimizer=optimizer, base_lr=config.base_lr[0], max_lr=config.max_lr[0], step_size=config.step_size, mode='triangular2') else: raise RuntimeError( 'model name must be one of [resnet50, se_resnext50, densenet201]' ) model = nn.DataParallel(model, device_ids=config.gpu_ids) model = model.cuda() train_and_eval(model, scheduler, optimizer, criterion, loader_train, loader_valid, config)
} }, { "param_grid": { 'C': [0.1, 0.5, 1, 10], 'tol': [1e-2, 1e-3, 1e-4] } }, { "param_grid": { 'base_estimator': [LogisticRegression()], 'n_estimators': [50, 100, 150, 200] } }] df_results = u.train_and_eval(cross_tuples, classifiers, classifier_kwargs, undersample=True, save='data/conference_paper.pkl') print(df_results.loc[:, df_results.columns != 'training_df']) lb_measures = [ 'measure_agm', 'measure_aml', 'measure_dome', 'measure_fcamap', 'measure_logmap', 'measure_logmapbio', 'measure_logmaplt', 'measure_pomap++', 'measure_wiktionary' ] conf_lb_columns = np.array(lb_measures)[np.isin( np.array(lb_measures), df_data_bins.columns)].tolist() print(conf_lb_columns)
"param_grid": { 'C': [0.1, 0.5, 1, 10], 'tol': [1e-2, 1e-3, 1e-4] } }, { "param_grid": { 'base_estimator': [LogisticRegression()], 'n_estimators': [50, 100, 150, 200] } }] cross_tuples = [ ([Xy_bins_an, Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_an], "i1"), ([Xy_bins_an, Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], "i2"), ([Xy_bins_an], [Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], "ii"), ([Xy_bins_lb1], [Xy_bins_lb2, Xy_bins_lb3], "iii1"), ([Xy_bins_lb2], [Xy_bins_lb1, Xy_bins_lb3], "iii2"), ([Xy_bins_lb3], [Xy_bins_lb1, Xy_bins_lb2], "iii3"), ([Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_lb1], "iv1"), ([Xy_bins_lb1, Xy_bins_lb3], [Xy_bins_lb2], "iv2"), ([Xy_bins_lb1, Xy_bins_lb2], [Xy_bins_lb3], "iv1"), ] df_results = u.train_and_eval(cross_tuples, classifiers, classifier_kwargs, undersample=True, save='data/largebio_paper.pkl') print(df_results.loc[:, df_results.columns != 'training_df'])
def main(): logger = logging.getLogger('__name__') x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv", n=1000) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=44) logger.debug("●○ Tokenization only. ○●") train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize) logger.debug("●○ Clean html. ○●") train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize, preprocessor=clean_html) logger.debug("●○ Normalize number. ○●") train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize, preprocessor=normalize_number) logger.debug("●○ Base form. ○●") train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize_base_form) logger.debug("●○ Lower text. ○●") train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize_base_form, lowercase=True) logger.debug("●○ Use MaCab; tokenize only. ○●") # not written in text import MeCab path_neologd = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd' t_mecab = MeCab.Tagger("-Owakati && -d {}".format(path_neologd)) def tokenize_by_mecab(text): return list(t_mecab.parse(text).strip().split(" ")) train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize_by_mecab)
def main(): logger = logging.getLogger('__name__') x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv", n=5000) logger.debug("●○ Tokenization ○●") x = [clean_html(text, strip=True) for text in x] x = [" ".join(tokenize(text)) for text in x] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=44) logger.debug("●○ Binary ○●") vectorizer = CountVectorizer(binary=True) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ Count ○●") vectorizer = CountVectorizer(binary=False) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ TF-IDF; Uni-gram ○●") vectorizer = TfidfVectorizer(ngram_range=(1, 1)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ TF-IDF; Bi-gram ○●") vectorizer = TfidfVectorizer(ngram_range=(2, 2)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ TF-IDF; Uni- and Bi-grams ○●") vectorizer = TfidfVectorizer(ngram_range=(1, 2)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ TF-IDF; Uni-, Bi-, and Tri-grams ○●") vectorizer = TfidfVectorizer(ngram_range=(1, 3)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ Use MaCab; TF-IDF; Uni-gram ○●") # not written in text import MeCab path_neologd = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd' t_mecab = MeCab.Tagger("-Owakati && -d {}".format(path_neologd)) def tokenize_by_mecab(text): return list(t_mecab.parse(text).strip().split(" ")) x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv", n=5000) x = [clean_html(text, strip=True) for text in x] x = [" ".join(tokenize_by_mecab(text)) for text in x] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=44) vectorizer = TfidfVectorizer(ngram_range=(1, 1)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug( "●○ Use MaCab; TF-IDF; Uni- and Bi-grams ○●") # not written in text vectorizer = TfidfVectorizer(ngram_range=(1, 2)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer) logger.debug("●○ Use MaCab; TF-IDF; Uni-, Bi-, and Tri-grams ○●" ) # not written in text vectorizer = TfidfVectorizer(ngram_range=(1, 3)) train_and_eval(x_train, y_train, x_test, y_test, vectorizer)