def conf_thresholding(data_dir, save_dir): # collect all trajectories tracker_id_score = dict() eval_dir = os.path.join(data_dir, 'data') seq_list, num_seq = load_list_from_folder(eval_dir) for seq_file in seq_list: seq_data, num_line = load_txt_file(seq_file) for data_line in seq_data: data_split = data_line.split(' ') score_tmp = float(data_split[-1]) id_tmp = int(data_split[1]) if id_tmp not in tracker_id_score.keys(): tracker_id_score[id_tmp] = list() tracker_id_score[id_tmp].append(score_tmp) # collect the ID to remove based on the confidence to_delete_id = list() for track_id, score_list in tracker_id_score.items(): average_score = sum(score_list) / float(len(score_list)) if average_score < score_threshold: to_delete_id.append(track_id) # remove the ID in the data folder save_dir_tmp = os.path.join(save_dir, 'data'); mkdir_if_missing(save_dir_tmp) for seq_file in seq_list: seq_name = fileparts(seq_file)[1] seq_file_save = os.path.join(save_dir_tmp, seq_name+'.txt'); seq_file_save = open(seq_file_save, 'w') seq_data, num_line = load_txt_file(seq_file) for data_line in seq_data: data_split = data_line.split(' ') id_tmp = int(data_split[1]) if id_tmp not in to_delete_id: seq_file_save.write(data_line + '\n') seq_file_save.close() # remove the ID in the trk with id folder trk_id_dir = os.path.join(data_dir, 'trk_withid') seq_dir_list, num_seq = load_list_from_folder(trk_id_dir) save_dir_tmp = os.path.join(save_dir, 'trk_withid') for seq_dir in seq_dir_list: frame_list, num_frame = load_list_from_folder(seq_dir) seq_name = fileparts(seq_dir)[1] save_frame_dir = os.path.join(save_dir_tmp, seq_name); mkdir_if_missing(save_frame_dir) for frame in frame_list: frame_index = fileparts(frame)[1] frame_file_save = os.path.join(save_frame_dir, frame_index+'.txt'); frame_file_save = open(frame_file_save, 'w') frame_data, num_line = load_txt_file(frame) for data_line in frame_data: data_split = data_line.split(' ') id_tmp = int(data_split[-1]) if id_tmp not in to_delete_id: frame_file_save.write(data_line + '\n') frame_file_save.close()
def test_cocoeval(): engine = data_engine.Movie2Caption('attention', 'youtube2text', video_feature='googlenet', mb_size_train=20, mb_size_test=20, maxlen=50, n_words=20000, n_frames=20, outof=None) samples_valid = utils.load_txt_file('./model_files/valid_samples.txt') samples_test = utils.load_txt_file('./model_files/test_samples.txt') samples_valid = [sample.strip() for sample in samples_valid] samples_test = [sample.strip() for sample in samples_test] samples_valid = build_sample_pairs(samples_valid, engine.valid_ids) samples_test = build_sample_pairs(samples_test, engine.test_ids) valid_score, test_score = score_with_cocoeval(samples_valid, samples_test, engine) print valid_score, test_score
def process_document(path, vocab, title_start="========,", forbidden_start="***LIST***", test=False, ssplit=True): print("ssplit: " + str(ssplit)) lines = ([ l for l in utils.load_lines(path) if not l.startswith(forbidden_start) ]) if ssplit else (sentence_split(utils.load_txt_file(path))) stride = 1 if test else config.sent_stride lab_lines = [] lines_txt = [] for i in range(len(lines)): if lines[i].startswith(title_start): continue if (i - 1) >= 0 and lines[i - 1].startswith(title_start): lab_lines.append((lines[i], 1)) else: lab_lines.append((lines[i], 0)) lines_txt.append(lines[i]) raw_blocks = [] i = 0 while i < len(lab_lines): block = lab_lines[i:i + config.sent_window] if len(block) < config.sent_window: block.extend([(config.fake_sent, 0)] * (config.sent_window - len(block))) raw_blocks.append(block) i += stride if not test: random.shuffle(raw_blocks) raw_blocks = raw_blocks[:int(config.perc_blocks_train * len(raw_blocks))] doc_recs = [] for rb in raw_blocks: records = create_one_instance(rb, lines_txt, vocab) doc_recs.extend(records) return doc_recs, len(raw_blocks), raw_blocks if test else None
def classify(): IMAGE = '/dl1/data/projects/imagenet/valid/' LABELS = '/dl1/data/projects/imagenet/val.txt' MEAN = '/dl1/data/projects/imagenet/ilsvrc_2012_mean.npy' EXT = 'JPEG' preprocessor = utils.VGGImageFuncs() '''build theano fn''' x = T.ftensor4('images') model = build_model(x) y = lasagne.layers.get_output(model['prob'], deterministic=True) params = lasagne.layers.get_all_params(model['prob'], trainable=True) classify_fn = theano.function([x], y) '''perform classification''' files = glob.glob(IMAGE + '/*.' + EXT) files = utils.sort_by_numbers_in_file_name(files) labels = utils.load_txt_file(LABELS) labels = [int((label.split(' ')[-1]).strip()) for label in labels] # go through minibatches idx = utils.generate_minibatch_idx(len(files), 64) TOP1s = [] TOP5s = [] for i, index in enumerate(idx): t0 = time.time() current = [files[j] for j in index] gts = np.asarray([labels[j] for j in index]) #inputs =[load_image(im_f) for im_f in current] inputs = preprocessor.preprocess(current) import ipdb ipdb.set_trace() probs = classify_fn(inputs) # (m, 1000, 1, 1) probs = np.squeeze(probs) predictions = probs.argsort()[:, ::-1][:, :5] for pred, gt in zip(predictions, gts): TOP1 = pred[0] == gt TOP5 = gt in pred TOP1s.append(TOP1) TOP5s.append(TOP5) print '%d / %d minibatches, acu TOP1 %.4f, TOP5 %.4f, used %.2f' % ( i, len(idx), np.mean(TOP1s) * 100, np.mean(TOP5s) * 100, time.time() - t0)