コード例 #1
0
def evaluate(args, model):
    """ Train the model """
    dev_dataset = SequenceDataset(
        TextTokenIdsCache(args.preprocess_dir, f"{args.mode}-query"),
        args.max_seq_length)
    collate_fn = get_collate_function(args.max_seq_length)
    batch_size = args.pergpu_eval_batch_size
    if args.n_gpu > 1:
        batch_size *= args.n_gpu
    dev_dataloader = DataLoader(dev_dataset,
                                batch_size=batch_size,
                                collate_fn=collate_fn)

    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
    qembedding_memmap = np.memmap(args.qmemmap_path,
                                  dtype="float32",
                                  shape=(len(dev_dataset), 768),
                                  mode="w+")
    with torch.no_grad():
        for step, (batch, qoffsets) in enumerate(tqdm(dev_dataloader)):
            batch = {k: v.to(args.model_device) for k, v in batch.items()}
            model.eval()
            embeddings = model(input_ids=batch["input_ids"],
                               attention_mask=batch["attention_mask"],
                               is_query=True)
            embeddings = embeddings.detach().cpu().numpy()
            qembedding_memmap[qoffsets] = embeddings
    return qembedding_memmap
コード例 #2
0
ファイル: inference.py プロジェクト: jingtaozhan/DRhard
def query_inference(model, args, embedding_size):
    if os.path.exists(args.query_memmap_path):
        print(f"{args.query_memmap_path} exists, skip inference")
        return
    query_collator = single_get_collate_function(args.max_query_length)
    query_dataset = SequenceDataset(ids_cache=TextTokenIdsCache(
        data_dir=args.preprocess_dir, prefix=f"{args.mode}-query"),
                                    max_seq_length=args.max_query_length)
    query_memmap = np.memmap(args.query_memmap_path,
                             dtype=np.float32,
                             mode="w+",
                             shape=(len(query_dataset), embedding_size))
    queryids_memmap = np.memmap(args.queryids_memmap_path,
                                dtype=np.int32,
                                mode="w+",
                                shape=(len(query_dataset), ))
    try:
        prediction(model,
                   query_collator,
                   args,
                   query_dataset,
                   query_memmap,
                   queryids_memmap,
                   is_query=True)
    except:
        subprocess.check_call(["rm", args.query_memmap_path])
        subprocess.check_call(["rm", args.queryids_memmap_path])
        raise
コード例 #3
0
ファイル: main.py プロジェクト: guglielmocamporese/rulstm
def get_loader(mode, override_modality=None):
    if override_modality:
        path_to_lmdb = join(args.path_to_data, override_modality)
    else:
        path_to_lmdb = join(args.path_to_data,
                            args.modality) if args.modality != 'fusion' else [
                                join(args.path_to_data, m)
                                for m in ['rgb', 'flow', 'obj']
                            ]

    kargs = {
        'path_to_lmdb': path_to_lmdb,
        'path_to_csv': join('./data', f"{mode}.csv"),
        'time_step': args.alpha,
        'img_tmpl': args.img_tmpl,
        'action_samples':
        args.S_ant if args.task == 'early_recognition' else None,
        'past_features': args.task == 'anticipation',
        'sequence_length': args.S_enc + args.S_ant,
        'label_type':
        ['verb', 'noun', 'action'] if args.mode != 'train' else 'action',
        'challenge': 'test' in mode
    }

    _set = SequenceDataset(**kargs)

    return DataLoader(_set,
                      batch_size=args.batch_size,
                      num_workers=args.num_workers,
                      pin_memory=True,
                      shuffle=mode == 'training')
コード例 #4
0
def query_inference(model, args, embedding_size):
    query_collator = single_get_collate_function(args.max_query_length)
    query_dataset = SequenceDataset(ids_cache=TextTokenIdsCache(
        data_dir=args.preprocess_dir, prefix=f"{args.mode}-query"),
                                    max_seq_length=args.max_query_length)
    query_memmap = np.memmap(args.query_memmap_path,
                             dtype=np.float32,
                             mode="w+",
                             shape=(len(query_dataset), embedding_size))
    queryids_memmap = np.memmap(args.queryids_memmap_path,
                                dtype=np.int32,
                                mode="w+",
                                shape=(len(query_dataset), ))

    prediction(model,
               query_collator,
               args,
               query_dataset,
               query_memmap,
               queryids_memmap,
               is_query=True)
コード例 #5
0
def get_loader(mode, override_modality = None):
    if override_modality:
        path_to_lmdb = join(args.path_to_data, override_modality)
    else:
        path_to_lmdb = join(args.path_to_data, args.modality) if args.modality != 'fusion' else [join(args.path_to_lmdb_data, m) for m in args.fusion_list]

    kargs = {
        'path_to_lmdb': path_to_lmdb,
        'path_to_csv': join(args.path_to_data, "{}.csv".format(mode)),
        'time_step': args.alpha,
        'img_tmpl': args.img_tmpl,
        'past_features': args.task == 'anticipation',
        'sequence_length': 1,
        'label_type': ['verb', 'noun', 'action'],
        'challenge': 'test' in mode,
        'args': args
    }

    _set = SequenceDataset(**kargs)

    return DataLoader(_set, batch_size=args.batch_size, num_workers=args.num_workers,
                      pin_memory=True, shuffle=mode == 'training')
コード例 #6
0
 def __init__(self, queryids_cache, rel_file, max_query_length):
     SequenceDataset.__init__(self, queryids_cache, max_query_length)
     self.reldict = load_rel(rel_file, direction="query2doc")
コード例 #7
0
ファイル: test.py プロジェクト: sinlin0908/emotion_analyse
if __name__ == "__main__":
    data = pd.read_excel('./test.xlsx')
    label_names = list(data.columns[3:])
    infos = data.to_dict(orient='records')

    with open('./emb_matrix.pickle', 'rb') as f:
        emb_matrix = pickle.load(f)

    with open('./w2id_dict.pickle', 'rb') as f:
        w2id_dict = pickle.load(f)

    test_dataset = SequenceDataset(
        label_names=label_names,
        max_len=30,
        w2id_dict=w2id_dict,
        data_info=infos,
        is_train=False
    )

    test_data_loader = DataLoader(
        dataset=test_dataset,
        batch_size=10,
        shuffle=False,
        num_workers=2,
    )

    model = torch.load("./best.model")
    model.to(device)

    print(model)
コード例 #8
0
    data = pd.read_excel('./data.xlsx')
    label_names = list(data.columns[2:])

    data = data[(data[label_names] != 0).any(1)]  # 砍掉通0

    print("label names\n", label_names)
    infos = data.to_dict(orient='records')

    with open('./emb_matrix.pickle', 'rb') as f:
        emb_matrix = pickle.load(f)

    with open('./w2id_dict.pickle', 'rb') as f:
        w2id_dict = pickle.load(f)

    train_dataset = SequenceDataset(label_names=label_names,
                                    max_len=30,
                                    w2id_dict=w2id_dict,
                                    data_info=infos)

    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=2,
    )

    print("Data loader size: ", len(train_data_loader))

    if args.pretrained == 'T':
        print("Pretrained...")
        model = torch.load("../pretrain/best.model")
        model.output_layer = nn.Linear(in_features=128 * 2,