def main(data_root, result_root, split, seed, feat_window_size): result_root += "-s-%d-%d" % (split, seed) ### read label2index mapping and index2label mapping ########################### label2index = dict() index2label = dict() with open(os.path.join(data_root, 'mapping.txt'), 'r') as f: content = f.read().split('\n')[0:-1] for line in content: label2index[line.split()[1]] = int(line.split()[0]) index2label[int(line.split()[0])] = line.split()[1] ### read test data ############################################################# #with open('data/split1.test', 'r') as f: with open(os.path.join(data_root, 'split%d.test' % split), 'r') as f: video_list = f.read().split('\n')[0:-1] dataset = Dataset(data_root, video_list, label2index, shuffle=False) # load prior, length model, grammar, and network load_iteration = NUM_ITERS log_prior = np.log( np.loadtxt('%s/prior.iter-' % result_root + str(load_iteration) + '.txt')) grammar = PathGrammar('%s/grammar.txt' % result_root, label2index) length_model = PoissonModel('%s/lengths.iter-' % result_root + str(load_iteration) + '.txt', max_length=2000) forwarder = Forwarder(dataset.input_dimension, dataset.n_classes, feat_window_size=feat_window_size) forwarder.load_model('%s/network.iter-' % result_root + str(load_iteration) + '.net') # parallelization n_threads = 4 # Viterbi decoder viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=30, max_hypotheses=np.inf) # forward each video log_probs = dict() queue = mp.Queue() for i, data in enumerate(dataset): sequence, _ = data video = list(dataset.features.keys())[i] queue.put(video) log_probs[video] = forwarder.forward(sequence) - log_prior log_probs[video] = log_probs[video] - np.max(log_probs[video]) # Viterbi decoding procs = [] for i in range(n_threads): p = mp.Process(target=decode, args=(queue, log_probs, viterbi_decoder, index2label, result_root)) procs.append(p) p.start() for p in procs: p.join()
def infer(label2index, index2label, n_threads): # load models log_prior = np.log(np.loadtxt('results/prior')) grammar = PathGrammar('results/grammar', label2index) length_model = PoissonModel('results/mean_lengths', max_length=2000) forwarder = Forwarder('results/net.model') # Viterbi decoder (max_hypotheses = n: at each time step, prune all hypotheses worse than the top n) viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=30, max_hypotheses=50000) # create list of test videos with open('data/split1.test', 'r') as f: video_list = f.read().split('\n')[0:-1] # forward each video log_probs = dict() queue = mp.Queue() for video in video_list: queue.put(video) dataset = Dataset('data', [video], label2index) log_probs[video] = forwarder.forward(dataset) - log_prior log_probs[video] = log_probs[video] - np.max(log_probs[video]) # Viterbi decoding procs = [] for i in range(n_threads): p = mp.Process(target=decode, args=(queue, log_probs, viterbi_decoder, index2label)) procs.append(p) p.start() for p in procs: p.join()
def main(data_root, result_root, split, seed): result_root += "-s-%d-%d" % (split, seed) os.makedirs(result_root, exist_ok=True) ### read label2index mapping and index2label mapping ########################### label2index = dict() index2label = dict() # with open('data/mapping.txt', 'r') as f: with open(os.path.join(data_root, 'mapping.txt'), 'r') as f: content = f.read().split('\n')[0:-1] for line in content: label2index[line.split()[1]] = int(line.split()[0]) index2label[int(line.split()[0])] = line.split()[1] ### read training data ######################################################### print('read data...') # with open('data/split1.train', 'r') as f: with open(os.path.join(data_root, 'split%d.train' % split), 'r') as f: video_list = f.read().split('\n')[0:-1] dataset = Dataset(data_root, video_list, label2index, shuffle = True) print('done') ### generate path grammar for inference ######################################## paths = set() for _, transcript in dataset: paths.add( ' '.join([index2label[index] for index in transcript]) ) #with open('results/grammar.txt', 'w') as f: with open(os.path.join(result_root, 'grammar.txt'), 'w') as f: f.write('\n'.join(paths) + '\n') ### actual nn-viterbi training ################################################# decoder = Viterbi(None, None, frame_sampling = 30, max_hypotheses = np.inf) # (None, None): transcript-grammar and length-model are set for each training sequence separately, see trainer.train(...) trainer = Trainer(decoder, dataset.input_dimension, dataset.n_classes, buffer_size = len(dataset), buffered_frame_ratio = 25) learning_rate = 0.01 # train for 10000 iterations for i in tqdm(range(NUM_ITERS)): sequence, transcript = dataset.get() loss = trainer.train(sequence, transcript, batch_size = 512, learning_rate = learning_rate) # print some progress information if (i+1) % 100 == 0: print('Iteration %d, loss: %f' % (i+1, loss)) # save model every 1000 iterations if (i+1) % 1000 == 0: print('save snapshot ' + str(i+1)) # network_file = 'results/network.iter-' + str(i+1) + '.net' network_file = os.path.join(result_root, 'network.iter-' + str(i + 1) + '.net') # length_file = 'results/lengths.iter-' + str(i+1) + '.txt' length_file = os.path.join(result_root, 'lengths.iter-' + str(i + 1) + '.txt') # prior_file = 'results/prior.iter-' + str(i+1) + '.txt' prior_file = os.path.join(result_root, 'prior.iter-' + str(i + 1) + '.txt') trainer.save_model(network_file, length_file, prior_file) # adjust learning rate after 2500 iterations if (i+1) == 2500: learning_rate = learning_rate * 0.1
''' for _, transcript in dataset: count+=1 path=[] for index in transcript: path.append(index2label[index]) #print(path,' '.join(path)) paths.add(' '.join(path)) ''' print(len(paths),count) #with open(results_path+'grammar.txt', 'w') as f: # f.write('\n'.join(paths) + '\n') ### actual nn-viterbi training ################################################# decoder = Viterbi(None, None, frame_sampling = 5, max_hypotheses = np.inf) # (None, None): transcript-grammar and length-model are set for each training sequence separately, see trainer.train(...) trainer = Trainer(decoder, dataset.n_classes, buffer_size = len(dataset), buffered_frame_ratio = 1) learning_rate = 0.00001 avg_loss=0 # train for 10000 iterations for i in range(100000): sequence, transcript = dataset.get() #print('training',i) loss = trainer.train(sequence, transcript, batch_size = 1, learning_rate = learning_rate) avg_loss += loss # print some progress information if (i+1) % 10 == 0: print('Iteration %d, loss: %f Average Loss %f' % (i + 1, loss, avg_loss / (i + 1))) # save model every 1000 iterations if (i+1) % 100 == 0: print('save snapshot ' + str(i+1))
np.loadtxt(args.result_path + 'prior.iter-' + str(load_iteration) + '.txt')) grammar = PathGrammar(args.result_path + 'grammar.txt', label2index) length_model = PoissonModel(args.result_path + 'lengths.iter-' + str(load_iteration) + '.txt', max_length=2000) forwarder = Forwarder(dataset.input_dimension, dataset.n_classes) forwarder.load_model(args.result_path + 'network.iter-' + str(load_iteration) + '.net') # parallelization n_threads = 4 # Viterbi decoder viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=30, max_hypotheses=np.inf) # forward each video log_probs = dict() queue = mp.Queue() for i, data in enumerate(dataset): sequence, _ = data video = list(dataset.features.keys())[i] queue.put(video) log_probs[video] = forwarder.forward(sequence) - log_prior log_probs[video] = log_probs[video] - np.max(log_probs[video]) # Viterbi decoding procs = [] for i in range(n_threads): p = mp.Process(target=decode, args=(queue, log_probs, viterbi_decoder, index2label))
log_prior = np.log( np.loadtxt('results/prior.iter-' + str(load_iteration) + '.txt')) grammar = PathGrammar('results/grammar.txt', label2index) length_model = PoissonModel('results/lengths.iter-' + str(load_iteration) + '.txt', max_length=2000) forwarder = Forwarder(dataset.input_dimension, dataset.n_classes) forwarder.load_model('results/network.iter-' + str(load_iteration) + '.net') window = 10 step = 5 # parallelization n_threads = 4 # Viterbi decoder viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=30) # forward each video log_probs = dict() queue = mp.Queue() for i, data in enumerate(dataset): sequence, _ = data video = list(dataset.features.keys())[i] queue.put(video) log_probs[video] = forwarder.forward( sequence).data.cpu().numpy() - log_prior log_probs[video] = log_probs[video] - np.max(log_probs[video]) # Viterbi decoding procs = [] for i in range(n_threads): p = mp.Process(target=stn_decode, args=(queue, log_probs, viterbi_decoder, index2label,
# load your data here (must be in log space!!!): # Note that you might want to remove a prior first. log_probs = np.loadtxt(file_probs, dtype=np.float32) # sanity check print(np.max(log_probs)) print(np.min(log_probs)) # scale down if out of range if np.max(log_probs) > 0: log_probs = log_probs - (2 * np.max(log_probs)) # Viterbi decoder (max_hypotheses = n: at each time step, prune all hypotheses worse than the top n) viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=20, max_hypotheses=50000) # Viterbi decoding print('Processing ' + file_probs) print('Result file ' + file_out) try: start = time.time() score, labels, segments = viterbi_decoder.decode(log_probs) end = time.time() print(end - start) # save result with open(file_out, 'w') as f: for l in labels: f.write(index2label[l] + '\n')
dataset = Dataset('data', video_list, label2index, shuffle=False) print('done') # length model, grammar, and network grammar = NGram('results/grammar.txt', label2index, ngram_order=3) length_model = MeanLengthModel(dataset.n_classes, max_length=500, threshold=200.0) # parallelization n_threads = 8 # Viterbi decoder viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=10, pruning_factor=0.98, max_segment_start_hyp=20) # Viterbi decoding q = mp.Queue() for i, data in enumerate(dataset.features): video = list(dataset.features.keys())[i] q.put(video) procs = [] for i in range(n_threads): p = mp.Process(target=decode, args=(q, viterbi_decoder, index2label, dataset)) procs.append(p) p.start() for p in procs: