Exemple #1
0
def build_examples():
    '''build examples'''
    for target in 'px4-v2', 'navio':
        print("Running build.examples for %s" % target)
        try:
            util.build_examples(target)
        except Exception as e:
            print("Failed build_examples on board=%s" % target)
            print(str(e))
            return False

    return True
Exemple #2
0
def build_examples():
    '''build examples'''
    for target in 'px4-v2', 'navio':
        print("Running build.examples for %s" % target)
        try:
            util.build_examples(target)
        except Exception as e:
            print("Failed build_examples on board=%s" % target)
            print(str(e))
            return False

    return True
Exemple #3
0
def build_examples():
    """build examples"""
    for target in "px4-v2", "navio":
        print("Running build.examples for %s" % target)
        try:
            util.build_examples(target)
        except Exception as e:
            print("Failed build_examples on board=%s" % target)
            print(str(e))
            return False

    return True
 def run(self):
     for target in self.targets:
         util.build_examples(target, clean=self.clean)
 def run(self):
     for target in self.targets:
         util.build_examples(target, clean=self.clean)
def train_portion(ace_ontology, data_ace, ratio):
    # parameters
    n_epoch = 10
    batch_size = 12

    learning_rate = 5e-5
    adam_epsilon = 1e-8
    warmup_steps = 0
    max_grad_norm = 1.0

    # load model
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    device = 'cuda'
    tokenizer, model = load_model('../data/my-bert-large-cased-squad/', device)
    # tokenizer, model = load_model('/home/jliu/data/BertModel/bert-large-cased', device)

    cut_idx = int(len(data_ace['train']) * ratio)
    print('Training examples', cut_idx)

    max_seq_len = 120
    training_set = build_examples(ace_ontology, data_ace['train'][:cut_idx], training=True)
    training_set = transfer_to_query_bert_format(training_set, tokenizer, max_seq_len, training=True)
    train_dataset = Dataset(batch_size, max_seq_len, training_set)

    # developping set
    dev_set = build_examples(ace_ontology, data_ace['dev'], training=False)
    dev_set = transfer_to_query_bert_format(dev_set, tokenizer, max_seq_len, training=False)
    dev_dataset = Dataset(batch_size, max_seq_len, dev_set)

    t_total = int(n_epoch * len(training_set) / batch_size)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, eps=adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=t_total)
    
    torch.cuda.empty_cache()

    global_step = 0
    for _ in range(n_epoch):
        for batch in train_dataset.get_tqdm(device, shuffle=True):
            global_step += 1
            model.train()
            input_ids, input_mask, segment_ids, start_positions, end_positions, token_to_orig_map, example = batch

            inputs = {'input_ids': input_ids,
                    'attention_mask':  input_mask,
                    'token_type_ids':  segment_ids,
                    'start_positions': start_positions,
                    'end_positions':   end_positions}
            outputs = model(**inputs)
            loss = outputs[0]
            loss = loss.mean()
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

            optimizer.step()
            scheduler.step() 
            model.zero_grad()
        
        model.eval()
        with torch.no_grad():
            model_evaluation(model, dev_dataset, device)
            print()
    #model.destroy() 
    del model
    torch.cuda.empty_cache()
Exemple #7
0
    # load model
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    device = 'cuda'
    tokenizer, model = load_model('../large_cased_finetuned', device)

    # load ACE data
    data_ace = pickle.load(open('../data/data_ace.pickle', 'rb'))
    for f in ['train', 'test', 'val']:
        data_ace[f] = transfer_data_format(data_ace[f])
    ace_ontology = _build_event_ontology(data_ace['train'] + data_ace['test'] +
                                         data_ace['val'])
    print(ace_ontology)
    max_seq_len = 120
    testing_set = build_examples(ace_ontology,
                                 data_ace['test'],
                                 training=False)
    testing_set = transfer_to_query_bert_format(testing_set,
                                                tokenizer,
                                                max_seq_len,
                                                training=False)
    test_dataset = Dataset(batch_size, max_seq_len, testing_set)

    data_framenet = pickle.load(open('../data/data_framenet.pickle', 'rb'))
    frame_ontology = _build_event_ontology(data_framenet)
    data_framenet = modify_framenet(data_framenet)

    max_seq_len = 120
    testing_set = build_examples(frame_ontology,
                                 data_framenet[:500],
                                 training=False)