Beispiel #1
0
    print()
    print('Best val loss: {:4f}'.format(best_loss))
    print('Best val metrics: {}'.format(best_metrics))
    print('At epoch: {:d}'.format(best_epoch))

    try:
        del inputs, labels
    except:
        pass
    torch.cuda.empty_cache()
    return model, optimizer, epoch, loss


# In[9]:

tokenizer = MyTokenizer(cfg=cfg)

# In[11]:

ds = MNSAllDataset('../data/Kdd/train_all_processed_label.h5',
                   neg_k=cfg['num_negative_sampling'],
                   single_thread=False)
val_ds = BasicAllDataset('../data/Kdd/valid_all_processed_label.h5',
                         single_thread=True)

# In[12]:

train_size = len(ds)
train_split = int(train_size * cfg['train_fraction'])
train_indices = list(range(train_size))
train_sampler = data.sampler.SubsetRandomSampler(train_indices[:train_split])
Beispiel #2
0
#def LabelPassBert():


def read_label_info(path='../data/Kdd/multimodal_labels.txt'):
    num_to_label = []
    with open(path, 'r') as f:
        l = f.readline()
        lines = f.readlines()
        for l in lines:
            words = l.strip().split('\t')
            num_to_label.append(words[1])
    return num_to_label


myTokenizer = MyTokenizer(cfg)


def convert_label_to_token_id(class_num_to_label, mytokenizer):
    # lens = []
    label_to_token_id = []
    for v in class_num_to_label:
        # lens.append(len(tokenizer.tokenizer.tokenize(v)))
        # class label从0开始,label_to_token_id[k]表示label k 的token
        label_to_token_id.append(
            mytokenizer.convert_str_to_ids(v,
                                           tensor=True,
                                           max_len=512,
                                           pad=False).view(1, -1))
    # print(np.percentile(lens, [0, 25, 50, 75, 95, 99, 100]))
    return label_to_token_id