Ejemplo n.º 1
0
def conf_thresholding(data_dir, save_dir):
	# collect all trajectories
	tracker_id_score = dict()
	eval_dir = os.path.join(data_dir, 'data')
	seq_list, num_seq = load_list_from_folder(eval_dir)
	for seq_file in seq_list:
		seq_data, num_line = load_txt_file(seq_file)
		for data_line in seq_data:
			data_split = data_line.split(' ')
			score_tmp = float(data_split[-1])
			id_tmp = int(data_split[1])

			if id_tmp not in tracker_id_score.keys():
				tracker_id_score[id_tmp] = list()
			tracker_id_score[id_tmp].append(score_tmp)

	# collect the ID to remove based on the confidence
	to_delete_id = list()
	for track_id, score_list in tracker_id_score.items():
		average_score = sum(score_list) / float(len(score_list))
		if average_score < score_threshold:
			to_delete_id.append(track_id)

	# remove the ID in the data folder
	save_dir_tmp = os.path.join(save_dir, 'data'); mkdir_if_missing(save_dir_tmp)
	for seq_file in seq_list:
		seq_name = fileparts(seq_file)[1]
		seq_file_save = os.path.join(save_dir_tmp, seq_name+'.txt'); seq_file_save = open(seq_file_save, 'w')

		seq_data, num_line = load_txt_file(seq_file)
		for data_line in seq_data:
			data_split = data_line.split(' ')
			id_tmp = int(data_split[1])
			if id_tmp not in to_delete_id:
				seq_file_save.write(data_line + '\n')
	
		seq_file_save.close()

	# remove the ID in the trk with id folder
	trk_id_dir = os.path.join(data_dir, 'trk_withid')
	seq_dir_list, num_seq = load_list_from_folder(trk_id_dir)
	save_dir_tmp = os.path.join(save_dir, 'trk_withid')
	for seq_dir in seq_dir_list:
		frame_list, num_frame = load_list_from_folder(seq_dir)
		seq_name = fileparts(seq_dir)[1]
		save_frame_dir = os.path.join(save_dir_tmp, seq_name); mkdir_if_missing(save_frame_dir)
		for frame in frame_list:
			frame_index = fileparts(frame)[1]
			frame_file_save = os.path.join(save_frame_dir, frame_index+'.txt'); frame_file_save = open(frame_file_save, 'w')	
			frame_data, num_line = load_txt_file(frame)
			for data_line in frame_data:
				data_split = data_line.split(' ')
				id_tmp = int(data_split[-1])
				if id_tmp not in to_delete_id:
					frame_file_save.write(data_line + '\n')

			frame_file_save.close()
Ejemplo n.º 2
0
def test_cocoeval():
    engine = data_engine.Movie2Caption('attention', 'youtube2text',
                                           video_feature='googlenet',
                                           mb_size_train=20,
                                           mb_size_test=20,
                                           maxlen=50, n_words=20000,
                                           n_frames=20, outof=None)
    samples_valid = utils.load_txt_file('./model_files/valid_samples.txt')
    samples_test = utils.load_txt_file('./model_files/test_samples.txt')
    samples_valid = [sample.strip() for sample in samples_valid]
    samples_test = [sample.strip() for sample in samples_test]

    samples_valid = build_sample_pairs(samples_valid, engine.valid_ids)
    samples_test = build_sample_pairs(samples_test, engine.test_ids)
    valid_score, test_score = score_with_cocoeval(samples_valid, samples_test, engine)
    print valid_score, test_score
Ejemplo n.º 3
0
def process_document(path,
                     vocab,
                     title_start="========,",
                     forbidden_start="***LIST***",
                     test=False,
                     ssplit=True):
    print("ssplit: " + str(ssplit))
    lines = ([
        l for l in utils.load_lines(path) if not l.startswith(forbidden_start)
    ]) if ssplit else (sentence_split(utils.load_txt_file(path)))
    stride = 1 if test else config.sent_stride
    lab_lines = []
    lines_txt = []
    for i in range(len(lines)):
        if lines[i].startswith(title_start):
            continue
        if (i - 1) >= 0 and lines[i - 1].startswith(title_start):
            lab_lines.append((lines[i], 1))
        else:
            lab_lines.append((lines[i], 0))
        lines_txt.append(lines[i])

    raw_blocks = []
    i = 0
    while i < len(lab_lines):
        block = lab_lines[i:i + config.sent_window]
        if len(block) < config.sent_window:
            block.extend([(config.fake_sent, 0)] *
                         (config.sent_window - len(block)))
        raw_blocks.append(block)
        i += stride

    if not test:
        random.shuffle(raw_blocks)
        raw_blocks = raw_blocks[:int(config.perc_blocks_train *
                                     len(raw_blocks))]

    doc_recs = []
    for rb in raw_blocks:
        records = create_one_instance(rb, lines_txt, vocab)
        doc_recs.extend(records)

    return doc_recs, len(raw_blocks), raw_blocks if test else None
Ejemplo n.º 4
0
def classify():
    IMAGE = '/dl1/data/projects/imagenet/valid/'
    LABELS = '/dl1/data/projects/imagenet/val.txt'
    MEAN = '/dl1/data/projects/imagenet/ilsvrc_2012_mean.npy'
    EXT = 'JPEG'
    preprocessor = utils.VGGImageFuncs()
    '''build theano fn'''
    x = T.ftensor4('images')
    model = build_model(x)
    y = lasagne.layers.get_output(model['prob'], deterministic=True)
    params = lasagne.layers.get_all_params(model['prob'], trainable=True)
    classify_fn = theano.function([x], y)
    '''perform classification'''
    files = glob.glob(IMAGE + '/*.' + EXT)
    files = utils.sort_by_numbers_in_file_name(files)
    labels = utils.load_txt_file(LABELS)
    labels = [int((label.split(' ')[-1]).strip()) for label in labels]
    # go through minibatches
    idx = utils.generate_minibatch_idx(len(files), 64)
    TOP1s = []
    TOP5s = []
    for i, index in enumerate(idx):
        t0 = time.time()
        current = [files[j] for j in index]
        gts = np.asarray([labels[j] for j in index])
        #inputs =[load_image(im_f) for im_f in current]
        inputs = preprocessor.preprocess(current)
        import ipdb
        ipdb.set_trace()
        probs = classify_fn(inputs)  # (m, 1000, 1, 1)
        probs = np.squeeze(probs)
        predictions = probs.argsort()[:, ::-1][:, :5]
        for pred, gt in zip(predictions, gts):
            TOP1 = pred[0] == gt
            TOP5 = gt in pred
            TOP1s.append(TOP1)
            TOP5s.append(TOP5)
        print '%d / %d minibatches, acu TOP1 %.4f, TOP5 %.4f, used %.2f' % (
            i, len(idx), np.mean(TOP1s) * 100, np.mean(TOP5s) * 100,
            time.time() - t0)