コード例 #1
0
def train(opt, train_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    progbar = Progbar(train_loader.dataset.length)
    end = time.time()
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        b_size, loss = model.train_emb(*train_data)
        # print loss
        progbar.add(b_size, values=[("loss", loss)])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
コード例 #2
0
def process(options, collection):
    rootpath = options.rootpath
    feature = options.feature
    pooling = options.pooling
    overwrite = options.overwrite

    pooling_func = get_pooling_func(pooling)
    feat_dir = os.path.join(rootpath, collection, 'FeatureData', feature)
    res_dir = os.path.join(rootpath, collection, 'FeatureData',
                           '%s_%s' % (pooling, feature))

    if os.path.exists(res_dir):
        if overwrite:
            logger.info("%s exists. overwrite", res_dir)
        else:
            logger.info("%s exists. quit", res_dir)
            return 0

    feat_file = BigFile(feat_dir)
    video2frames = {}
    for frame_id in feat_file.names:
        video_id, frame_index = frame_id.rsplit('_', 1)
        frame_index = int(frame_index)
        video2frames.setdefault(video_id, []).append(frame_id)

    if not os.path.exists(res_dir):
        os.makedirs(res_dir)

    res_binary_file = os.path.join(res_dir, 'feature.bin')
    fw = open(res_binary_file, 'wb')
    videoset = []

    pbar = Progbar(len(video2frames))
    for video_id, frame_id_list in video2frames.iteritems():
        renamed, vectors = feat_file.read(frame_id_list)
        name2vec = dict(zip(renamed, vectors))
        frame_id_list.sort(key=lambda v: int(v.rsplit('_', 1)[-1]))

        feat_matrix = np.zeros((len(renamed), len(vectors[0])))
        for i, frame_id in enumerate(frame_id_list):
            feat_matrix[i, :] = name2vec[frame_id]

        video_vec = pooling_func(feat_matrix)
        video_vec.astype(np.float32).tofile(fw)
        videoset.append(video_id)
        pbar.add(1)
    fw.close()

    fw = open(os.path.join(res_dir, 'id.txt'), 'w')
    fw.write(' '.join(videoset))
    fw.close()

    fw = open(os.path.join(res_dir, 'shape.txt'), 'w')
    fw.write('%d %d' % (len(videoset), len(video_vec)))
    fw.close()

    logger.info("%s pooling -> %dx%d video feature file", pooling,
                len(videoset), len(video_vec))
コード例 #3
0
 def fit_epoch(self, train_data, batch_size=None, incl_progbar=True):
     '''Fit on training data for an epoch'''
     if incl_progbar:
         progbar = Progbar(target=len(train_data)*batch_size if batch_size else len(train_data))
     for (word_id_batch, tag_id_batch, deprel_id_batch), class_batch in \
             train_data:
         loss = self.fit_batch(
             word_id_batch, tag_id_batch, deprel_id_batch, class_batch)
         if incl_progbar:
             progbar.add(word_id_batch.shape[0], [("Cross-entropy", loss)])
コード例 #4
0
def process(options, collection, featnames):
    rootpath = options.rootpath
    target_featname = featnames
    featnames = featnames.split('+')
    target_feat_dir = os.path.join(rootpath, collection, 'FeatureData',
                                   target_featname)

    if os.path.exists(target_feat_dir):
        if options.overwrite:
            logger.info('%s exists! overwrite.', target_feat_dir)
        else:
            logger.info('%s exists! quit.', target_feat_dir)
            sys.exit(0)
    else:
        os.makedirs(target_feat_dir)

    target_binary_file = os.path.join(target_feat_dir, 'feature.bin')
    target_id_file = os.path.join(target_feat_dir, 'id.txt')

    feat_dim = 0
    img_ids = []
    featfiles = []

    for i, feat in enumerate(featnames):
        feat_dir = os.path.join(rootpath, collection, 'FeatureData', feat)
        featfile = BigFile(feat_dir)
        feat_dim += featfile.ndims
        if i == 0:
            img_ids = featfile.names
        else:
            assert len(img_ids) == len(featfile.names) and set(img_ids) == set(
                featfile.names), '%s not match target feature' % feat
        featfiles.append(featfile)

    with open(target_binary_file, 'w') as fw:
        progbar = Progbar(len(img_ids))
        for im in img_ids:
            target_feat_vec = []
            for feat in featfiles:
                vec = feat.read_one(im)
                target_feat_vec.extend(vec)
            vec = np.array(target_feat_vec, dtype=np.float32)
            vec.tofile(fw)
            progbar.add(1)

    with open(os.path.join(target_feat_dir, 'id.txt'), 'w') as fw:
        fw.write(' '.join(img_ids))

    with open(os.path.join(target_feat_dir, 'shape.txt'), 'w') as fw:
        fw.write('%d %d' % (len(img_ids), feat_dim))

    logger.info('%s: (%d, %d)', target_featname, len(img_ids), feat_dim)
コード例 #5
0
def process(options, collection, feat_name):
    overwrite = options.overwrite
    rootpath = options.rootpath

    feature_dir = os.path.join(rootpath, collection, 'feature')
    resdir = os.path.join(rootpath, collection, 'FeatureData', feat_name)

    train_csv = os.path.join(rootpath, collection, 'split', 'train.csv')
    val_csv = os.path.join(rootpath, collection, 'split', 'val.csv')
    test_csv = os.path.join(rootpath, collection, 'split', 'test.csv')

    train_val_test_set = []
    train_val_test_set.extend(map(str.strip, open(train_csv).readlines()))
    train_val_test_set.extend(map(str.strip, open(val_csv).readlines()))
    train_val_test_set.extend(map(str.strip, open(test_csv).readlines()))

    target_feat_file = os.path.join(resdir, 'id.feature.txt')
    if checkToSkip(os.path.join(resdir, 'feature.bin'), overwrite):
        sys.exit(0)
    makedirsforfile(target_feat_file)

    frame_count = []
    print 'Processing %s - %s' % (collection, feat_name)
    with open(target_feat_file, 'w') as fw_feat:
        progbar = Progbar(len(train_val_test_set))
        for d in train_val_test_set:
            feat_file = os.path.join(feature_dir, d,
                                     '%s-%s.npy' % (d, feat_name))
            feats = np.load(feat_file)
            if len(feats.shape) == 1:  # video level feature
                dim = feats.shape[0]
                fw_feat.write('%s %s\n' %
                              (d, ' '.join(['%.6f' % x for x in feats])))
            elif len(feats.shape) == 2:  # frame level feature
                frames, dim = feats.shape
                frame_count.append(frames)
                for i in range(frames):
                    frame_id = d + '_' + str(i)
                    fw_feat.write(
                        '%s %s\n' %
                        (frame_id, ' '.join(['%.6f' % x for x in feats[i]])))
            progbar.add(1)

    text2bin(dim, [target_feat_file], resdir, 1)
    os.system('rm %s' % target_feat_file)
コード例 #6
0
def process(options, collection):
    rootpath = options.rootpath
    oversample = options.oversample
    model_prefix = os.path.join(rootpath, options.model_prefix)
    sub_mean = model_prefix.find('resnext-101_rbps13k') >= 0
    logger.info('subtract mean? %d', sub_mean)
    layer = 'flatten0_output'
    batch_size = 1  # change the batch size will get slightly different feature vectors. So stick to batch size of 1.
    feat_name = get_feat_name(model_prefix, layer, oversample)
    feat_dir = os.path.join(rootpath, collection, 'FeatureData', feat_name)
    id_file = os.path.join(feat_dir, 'id.txt')
    feat_file = os.path.join(feat_dir, 'id.feature.txt')

    for x in [id_file, feat_file]:
        if os.path.exists(x):
            if not options.overwrite:
                logger.info('%s exists. skip', x)
                return 0
            else:
                logger.info('%s exists. overwrite', x)

    id_path_file = os.path.join(rootpath, collection, 'id.imagepath.txt')
    data = map(str.strip, open(id_path_file).readlines())
    img_ids = [x.split()[0] for x in data]
    filenames = [x.split()[1] for x in data]

    fe_mod = get_feat_extractor(model_prefix=model_prefix,
                                gpuid=options.gpu,
                                oversample=oversample)
    if fe_mod is None:
        return 0

    if not os.path.exists(feat_dir):
        os.makedirs(feat_dir)

    feat_file = os.path.join(feat_dir, 'id.feature.txt')
    fails_id_path = []
    fw = open(feat_file, 'w')

    im2path = zip(img_ids, filenames)
    success = 0
    fail = 0

    start_time = time.time()
    logger.info('%d images, %d done, %d to do', len(img_ids), 0, len(img_ids))
    progbar = Progbar(len(im2path))

    for i, (imgid, impath) in enumerate(im2path):
        try:
            imid, features = extract_mxnet_feat(fe_mod, imgid, impath,
                                                sub_mean, oversample)
            fw.write('%s %s\n' % (imid, ' '.join(['%g' % x
                                                  for x in features])))
            success += 1
        except Exception as e:
            fail += 1
            logger.error('failed to process %s', impath)
            logger.info('%d success, %d fail', success, fail)
            fails_id_path.append((imgid, impath))
        finally:
            progbar.add(1)

    logger.info('%d success, %d fail', success, fail)
    elapsed_time = time.time() - start_time
    logger.info('total running time %s',
                time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    fw.close()
    if len(fails_id_path) > 0:
        fail_fw = open(os.path.join(rootpath, collection, 'feature.fails.txt'),
                       'w')
        for (imgid, impath) in fails_id_path:
            fail_fw.write('%s %s\n' % (imgid, impath))
        fail_fw.close()
コード例 #7
0
ファイル: gene_aug_feat.py プロジェクト: kiminh/cbvr
def process(opt):

    rootpath = opt.rootpath
    collection = opt.collection
    feature = opt.feature
    stride = opt.stride
    overwrite = opt.overwrite
    pooling_style = opt.pooling_style


    feat_path = os.path.join(rootpath, collection, "FeatureData", feature)

    output_dir = os.path.join(rootpath, collection, "FeatureData", '%s-' % pooling_style + feature + "-stride%s" %  stride)
    feat_combined_file = os.path.join(output_dir, "id_feat.txt")
    if checkToSkip(os.path.join(output_dir, "feature.bin"), overwrite):
        sys.exit(0)
    makedirsforfile(feat_combined_file)

    print "Generate augmented frame-level features and operate mean pooling..."

    feat_data = BigFile(feat_path)
    video2fmnos = {}
    for frame_id in feat_data.names:
        data = frame_id.strip().split("_")
        video_id = '_'.join(data[:-1])
        fm_no = data[-1]
        video2fmnos.setdefault(video_id, []).append(int(fm_no))

    video2frames = {}
    for video_id, fmnos in video2fmnos.iteritems():
        for fm_no in sorted(fmnos):
            video2frames.setdefault(video_id, []).append(video_id + "_" + str(fm_no))
    

    stride = map(int, stride.strip().split('-'))
    f_auger = Frame_Level_Augmenter(stride)

    video2subvideo = {}
    fout = open(feat_combined_file, 'w')
    progbar = Progbar(len(video2frames))
    for video in video2frames:
        frame_ids = video2frames[video]

        # output the while video level feature
        video2subvideo.setdefault(video, []).append(video)
        reanme, feats  = feat_data.read(frame_ids)
        if pooling_style == 'avg':
            feat_vec = np.array(feats).mean(axis=0)
        elif pooling_style == 'max':
            feat_vec = np.array(feats).max(axis=0)
        fout.write(video + " " + " ".join(map(str,feat_vec)) + '\n')

    
        # output the sub video level feature
        counter = 0
        aug_index = f_auger.get_aug_index(len(frame_ids))  # get augmented frame list
        for sub_index in aug_index:
            sub_frames = [frame_ids[idx] for idx in sub_index]
            reanme, sub_feats  = feat_data.read(sub_frames)
            
            if pooling_style == 'avg':
                feat_vec = np.array(sub_feats).mean(axis=0)
            elif pooling_style == 'max':
                feat_vec = np.array(sub_feats).max(axis=0)

            video2subvideo.setdefault(video, []).append(video + "_sub%d" % counter)
            fout.write(video + "_sub%d" % counter + " " + " ".join(map(str,feat_vec)) + '\n')
            counter += 1
        progbar.add(1)

    fout.close()

    f = open(os.path.join(output_dir, "video2subvideo.txt"),'w')  
    f.write(str(video2subvideo))  
    f.close()  

    text2bin(len(feat_vec), [feat_combined_file], output_dir, 1)
    os.system('rm %s' % feat_combined_file)