def do_testing(gpu=0): """ This function evaluates our fine-tuning of the existing BERT model @param gpu: the index of the gpu you wish to use (default 0) @param prediction_mode: "regression" or "classification" """ # expected environment variables os.environ["BERT_BASE_DIR"] = "pretrained/cased_L-12_H-768_A-12" os.environ["DATA_DIR"] = "dataset" os.environ["OUTPUT_DIR"] = "output" assert os.environ.get("BERT_BASE_DIR") is not None assert os.environ.get("DATA_DIR") is not None assert os.environ.get("OUTPUT_DIR") is not None # set the gpu index os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) # set the required flags FLAGS.task_name = "topic" FLAGS.do_predict = True FLAGS.data_dir = os.environ.get("DATA_DIR") FLAGS.vocab_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "vocab.txt") FLAGS.bert_config_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_config.json") FLAGS.init_checkpoint = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_model.ckpt") FLAGS.do_lower_case = False FLAGS.max_seq_length = 128 FLAGS.output_dir = os.environ.get("OUTPUT_DIR") run_classifier.main(0)
def exec_phase1_ev(top, cp='ev_sent'): clear_flags() import run_classifier as model clear_flags() reload(model) model.FLAGS.task_name = 'ico' model.FLAGS.do_train = False model.FLAGS.do_predict = True model.FLAGS.data_dir = '{}/ev/'.format(top) model.FLAGS.output_dir = '{}/ev/results/'.format(top) model.FLAGS.model_dir = '{}/data/{}/model/'.format(bert_model_top, cp) model.FLAGS.vocab_file = '{}/vocab.txt'.format(BIO_BERT_DIR) model.FLAGS.bert_config_file = '{}/bert_config.json'.format(BIO_BERT_DIR) model.FLAGS.init_checkpoint = '{}'.format(BIO_BERT_DIR) model.main('')
def exec_phase2_ic_ev(top, cp='i_c_abst'): clear_flags() import run_classifier as model clear_flags() reload(model) # NOTE: important! We want to fit the whole abstact in memory model.FLAGS.max_seq_length = 512 model.FLAGS.do_train = False model.FLAGS.do_predict = True model.FLAGS.task_name = 'ico_ab' model.FLAGS.data_dir = '{}/ic_frame/'.format(top) model.FLAGS.output_dir = '{}/ic_frame/results/'.format(top) model.FLAGS.model_dir = '{}/data/{}/model/'.format(bert_model_top, cp) model.FLAGS.vocab_file = '{}/vocab.txt'.format(SCI_BERT_DIR) model.FLAGS.bert_config_file = '{}/bert_config.json'.format(SCI_BERT_DIR) model.FLAGS.init_checkpoint = '{}'.format(SCI_BERT_DIR) model.FLAGS.do_lower_case = DO_LOWER_CASE model.main('')
def do_training(gpu=0, prediction_mode="regression", restart=True): """ This function performs fine-tuning on the existing BERT model @param gpu: the index of the gpu you wish to use (default 0) @param prediction_mode: "regression" or "classification" @param restart: should we delete everything in ./output and start from scratch """ os.environ["BERT_BASE_DIR"] = "pretrained/cased_L-12_H-768_A-12" os.environ["DATA_DIR"] = "dataset" os.environ["OUTPUT_DIR"] = "output" # expected environment variables assert os.environ.get("BERT_BASE_DIR") is not None assert os.environ.get("DATA_DIR") is not None assert os.environ.get("OUTPUT_DIR") is not None # set the gpu index os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) # set the required flags FLAGS.task_name = "topic" FLAGS.do_train = True FLAGS.do_eval = True FLAGS.data_dir = os.environ.get("DATA_DIR") FLAGS.vocab_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "vocab.txt") FLAGS.bert_config_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_config.json") FLAGS.init_checkpoint = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_model.ckpt") FLAGS.do_lower_case = False FLAGS.max_seq_length = 128 FLAGS.train_batch_size = 32 FLAGS.learning_rate = 2e-5 FLAGS.num_train_epochs = 1 FLAGS.output_dir = os.environ.get("OUTPUT_DIR") if restart: delete_folder_contents(FLAGS.output_dir) open(".gitkeep", "w").close() run_classifier.main(0)
'-lponlybeam %s -outlatbeam %s -pbeam %s -pl_beam %s -pl_pbeam %s -wbeam %s' %('data/acousticModel/en-us/', 'data/dictionary/en-us/cmudict-en-us.dict', 'data/languageModel/en-us/en-us.lm', path, controlFile, 'outputText.hyp', beam, fwdflatbeam, fwdflatwbeam, lpbeam, lponlybeam, outlatbeam, pbeam, pl_beam, pl_pbeam, wbeam), shell=True) extWordProcess.communicate()[0] for root,subDirs,files in os.walk(outputTextPATH): for file in files: with open(outputTextPATH+file, 'r') as myfile: data=myfile.read().replace('\n', '') ################ Applying Tagging Algorithm ######################### print "Applying NaiveBayes Classifier ..." print "Extracting Tags..." print 'DONE !!!' print "Extracted Tags Are: " cls.main()
def build_validation_spec(self, hparams): run_type = "build_eval_spec" eval_spec = main(self.flags, run_type) return eval_spec
def build_train_spec(self, hparams): run_type = "build_train_spec" train_spec = main(self.flags, run_type) return train_spec
def build_estimator(self, hparams): run_type = "build_estimator" estimator = main(self.flags, run_type) return estimator