Example #1
0
def do_testing(gpu=0):
    """
    This function evaluates our fine-tuning of the existing BERT model

    @param gpu: the index of the gpu you wish to use (default 0)
    @param prediction_mode: "regression" or "classification"
    """
    # expected environment variables
    os.environ["BERT_BASE_DIR"] = "pretrained/cased_L-12_H-768_A-12"
    os.environ["DATA_DIR"] = "dataset"
    os.environ["OUTPUT_DIR"] = "output"
    assert os.environ.get("BERT_BASE_DIR") is not None
    assert os.environ.get("DATA_DIR") is not None
    assert os.environ.get("OUTPUT_DIR") is not None

    # set the gpu index
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
    # set the required flags
    FLAGS.task_name = "topic"
    FLAGS.do_predict = True
    FLAGS.data_dir = os.environ.get("DATA_DIR")
    FLAGS.vocab_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "vocab.txt")
    FLAGS.bert_config_file = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_config.json")
    FLAGS.init_checkpoint = os.path.join(os.environ.get("BERT_BASE_DIR"), "bert_model.ckpt")
    FLAGS.do_lower_case = False
    FLAGS.max_seq_length = 128
    FLAGS.output_dir = os.environ.get("OUTPUT_DIR")

    run_classifier.main(0)
Example #2
0
def exec_phase1_ev(top, cp='ev_sent'):
    clear_flags()
    import run_classifier as model
    clear_flags()
    reload(model)
    model.FLAGS.task_name = 'ico'
    model.FLAGS.do_train = False
    model.FLAGS.do_predict = True
    model.FLAGS.data_dir = '{}/ev/'.format(top)
    model.FLAGS.output_dir = '{}/ev/results/'.format(top)
    model.FLAGS.model_dir = '{}/data/{}/model/'.format(bert_model_top, cp)
    model.FLAGS.vocab_file = '{}/vocab.txt'.format(BIO_BERT_DIR)
    model.FLAGS.bert_config_file = '{}/bert_config.json'.format(BIO_BERT_DIR)
    model.FLAGS.init_checkpoint = '{}'.format(BIO_BERT_DIR)
    model.main('')
Example #3
0
def exec_phase2_ic_ev(top, cp='i_c_abst'):
    clear_flags()
    import run_classifier as model
    clear_flags()
    reload(model)
    # NOTE: important! We want to fit the whole abstact in memory
    model.FLAGS.max_seq_length = 512
    model.FLAGS.do_train = False
    model.FLAGS.do_predict = True
    model.FLAGS.task_name = 'ico_ab'
    model.FLAGS.data_dir = '{}/ic_frame/'.format(top)
    model.FLAGS.output_dir = '{}/ic_frame/results/'.format(top)
    model.FLAGS.model_dir = '{}/data/{}/model/'.format(bert_model_top, cp)
    model.FLAGS.vocab_file = '{}/vocab.txt'.format(SCI_BERT_DIR)
    model.FLAGS.bert_config_file = '{}/bert_config.json'.format(SCI_BERT_DIR)
    model.FLAGS.init_checkpoint = '{}'.format(SCI_BERT_DIR)
    model.FLAGS.do_lower_case = DO_LOWER_CASE
    model.main('')
Example #4
0
def do_training(gpu=0, prediction_mode="regression", restart=True):
    """
    This function performs fine-tuning on the existing BERT model

    @param gpu: the index of the gpu you wish to use (default 0)
    @param prediction_mode: "regression" or "classification"
    @param restart: should we delete everything in ./output and start from scratch
    """
    os.environ["BERT_BASE_DIR"] = "pretrained/cased_L-12_H-768_A-12"
    os.environ["DATA_DIR"] = "dataset"
    os.environ["OUTPUT_DIR"] = "output"
    # expected environment variables
    assert os.environ.get("BERT_BASE_DIR") is not None
    assert os.environ.get("DATA_DIR") is not None
    assert os.environ.get("OUTPUT_DIR") is not None

    # set the gpu index
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
    # set the required flags
    FLAGS.task_name = "topic"
    FLAGS.do_train = True
    FLAGS.do_eval = True
    FLAGS.data_dir = os.environ.get("DATA_DIR")
    FLAGS.vocab_file = os.path.join(os.environ.get("BERT_BASE_DIR"),
                                    "vocab.txt")
    FLAGS.bert_config_file = os.path.join(os.environ.get("BERT_BASE_DIR"),
                                          "bert_config.json")
    FLAGS.init_checkpoint = os.path.join(os.environ.get("BERT_BASE_DIR"),
                                         "bert_model.ckpt")
    FLAGS.do_lower_case = False
    FLAGS.max_seq_length = 128
    FLAGS.train_batch_size = 32
    FLAGS.learning_rate = 2e-5
    FLAGS.num_train_epochs = 1
    FLAGS.output_dir = os.environ.get("OUTPUT_DIR")

    if restart:
        delete_folder_contents(FLAGS.output_dir)
        open(".gitkeep", "w").close()
    run_classifier.main(0)
																			'-lponlybeam %s -outlatbeam %s -pbeam %s -pl_beam	%s -pl_pbeam %s -wbeam %s' 
																		%('data/acousticModel/en-us/',
																			'data/dictionary/en-us/cmudict-en-us.dict',
																			'data/languageModel/en-us/en-us.lm',
																			path,
																			controlFile,
																			'outputText.hyp',
																			beam,
																			fwdflatbeam,
																			fwdflatwbeam,
																			lpbeam,
																			lponlybeam,
																			outlatbeam,
																			pbeam,
																			pl_beam,
																			pl_pbeam,
																			wbeam),
																		shell=True)
extWordProcess.communicate()[0]
for root,subDirs,files in os.walk(outputTextPATH):
	for file in files:
		with open(outputTextPATH+file, 'r') as myfile:
			data=myfile.read().replace('\n', '')
################ Applying Tagging Algorithm #########################
print "Applying NaiveBayes Classifier ..."
print "Extracting Tags..."
print 'DONE !!!'
print "Extracted Tags Are: "
cls.main()

Example #6
0
 def build_validation_spec(self, hparams):
     run_type = "build_eval_spec"
     eval_spec = main(self.flags, run_type)
     return eval_spec
Example #7
0
 def build_train_spec(self, hparams):
     run_type = "build_train_spec"
     train_spec = main(self.flags, run_type)
     return train_spec
Example #8
0
 def build_estimator(self, hparams):
     run_type = "build_estimator"
     estimator = main(self.flags, run_type)
     return estimator