Beispiel #1
0
 def validate_model(self):
     logger.info('Validate GuidAbs model %s' % self.model_path)
     fn_touch = path.join(self.model_path,
                          'finished.validate_guidabs_model')
     if path.exists(fn_touch):
         return
     args = self._build_abs_args()
     args.mode = 'validate'
     args.bert_data_path = path.join(self.data_path, 'cnndm')
     args.model_path = self.model_path
     args.log_file = path.join(self.model_path, 'val_abs_bert_cnndm.log')
     args.batch_size = args.test_batch_size
     init_logger(args.log_file)
     acc_top3 = train_abs.validate_abs(args, args.device_id)
     # rename top3 models and remove other models
     for i, (acc, xent, cp) in enumerate(acc_top3):
         fn = path.basename(cp)
         tgt_path = path.join(self.model_path, 'top%s.%s' % (i, fn))
         os.system('mv %s %s -f' % (cp, tgt_path))
         logger.info('Archive validated GuidAbs model %s' % tgt_path)
     os.system('rm %s/model_step_*.pt -f' % self.model_path)
     os.system('touch %s' % fn_touch)
Beispiel #2
0
    parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True)

    args = parser.parse_args()
    args.gpu_ranks = [int(i) for i in range(len(args.visible_gpus.split(',')))]
    args.world_size = len(args.gpu_ranks)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpus

    init_logger(args.log_file)
    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    device_id = 0 if device == "cuda" else -1

    if (args.task == 'abs'):
        if (args.mode == 'train'):
            train_abs(args, device_id)
        elif (args.mode == 'validate'):
            validate_abs(args, device_id)
        elif (args.mode == 'lead'):
            baseline(args, cal_lead=True)
        elif (args.mode == 'oracle'):
            baseline(args, cal_oracle=True)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_abs(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
Beispiel #3
0
    init_logger(args.log_file)
    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    device_id = 0 if device == "cuda" else -1

    if (args.task == 'abs'):
        if args.uncased:
            tokenizer = BertTokenizer.from_pretrained(
                'bert-base-multilingual-uncased', do_lower_case=True)
        else:
            tokenizer = BertTokenizer.from_pretrained(
                'bert-base-multilingual-cased', do_lower_case=False)

        if (args.mode == 'train'):
            train_abs(args, device_id, tokenizer)
        elif (args.mode == 'validate'):
            validate_abs(args, device_id, tokenizer)
        elif (args.mode == 'lead'):
            baseline(args, cal_lead=True)
        elif (args.mode == 'oracle'):
            baseline(args, cal_oracle=True)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_abs(args, device_id, cp, step, tokenizer)
        elif (args.mode == 'dev'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
Beispiel #4
0
 def run(args, hpspace):
     if (args.task == 'abs'):
         if (args.mode == 'train'):
             if (args.hyperopt):
                 t = time.time()
                 newT = str(t).split(".")
                 args.model_path = "../models/" + newT[0]
                 args.log_file = "../logs/abs_bert_abs_" + newT[0]
                 args.lr_bert = hpspace['lr_bert']
                 args.lr_dec = hpspace['lr_dec']
                 args.accum_count = int(hpspace['accum_count'])
                 args.beta1 = hpspace['beta1']
                 args.beta2 = hpspace['beta2']
                 args.visible_gpus = '0'
                 args.bert_model = '..temp/bert-base-danish-uncased-v2'
                 args.vocab = '..temp/bert-base-danish-uncased-v2'
             train_stats = train_abs(args, device_id)
             x = train_stats.x
             ppl = train_stats.perplexity
             acc = train_stats.acc
             print(x)
             return {
                 'loss': x,
                 'eval_time': time.time(),
                 'status': STATUS_OK,
                 'other_stuff': {
                     'ppl': ppl,
                     'acc': acc
                 }
             }
         elif (args.mode == 'validate'):
             validate_abs(args, device_id)
         elif (args.mode == 'lead'):
             baseline(args, cal_lead=True)
         elif (args.mode == 'oracle'):
             baseline(args, cal_oracle=True)
         if (args.mode == 'test'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
             test_abs(args, device_id, cp, step)
         elif (args.mode == 'test_text'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
                 test_text_abs(args, device_id, cp, step)
     elif (args.task == 'ext'):
         if (args.mode == 'train'):
             train_ext(args, device_id)
         elif (args.mode == 'validate'):
             validate_ext(args, device_id)
         if (args.mode == 'test'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
             test_ext(args, device_id, cp, step)
         elif (args.mode == 'test_text'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
                 test_text_abs(args, device_id, cp, step)
     if (args.mode == "sent_label"):
         step = 0
         sent_label_ext(args, device_id)
Beispiel #5
0
    DEVICE = "cpu" if args.visible_gpus == '-1' else "cuda"
    DEVICE_ID = 0 if DEVICE == "cuda" else -1

    # Create directories
    os.makedirs(args.model_path, exist_ok=True)
    os.makedirs(args.log_path, exist_ok=True)
    os.makedirs(args.result_path, exist_ok=True)

    # Train/valid/test
    if args.task == 'abs':
        if args.mode == 'train':
            init_logger(os.path.join(args.log_path, 'train.log'))
            train_abs(args, DEVICE_ID)
        elif args.mode == 'validate':
            init_logger(os.path.join(args.log_path, 'valid.log'))
            validate_abs(args, DEVICE_ID)
        elif args.mode == 'test':
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                print("Not correct model name (EX: model_step_1200.pt)")
            init_logger(
                os.path.join(args.log_path, 'test.' + str(step) + '.log'))
            test_abs(args, DEVICE_ID, cp, step)
        elif args.mode == 'test_text':
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                print("Not correct model name (EX: model_step_1200.pt)")