def main(data_root, result_root, split, seed, feat_window_size):
    result_root += "-s-%d-%d" % (split, seed)

    ### read label2index mapping and index2label mapping ###########################
    label2index = dict()
    index2label = dict()
    with open(os.path.join(data_root, 'mapping.txt'), 'r') as f:
        content = f.read().split('\n')[0:-1]
        for line in content:
            label2index[line.split()[1]] = int(line.split()[0])
            index2label[int(line.split()[0])] = line.split()[1]

    ### read test data #############################################################
    #with open('data/split1.test', 'r') as f:
    with open(os.path.join(data_root, 'split%d.test' % split), 'r') as f:
        video_list = f.read().split('\n')[0:-1]
    dataset = Dataset(data_root, video_list, label2index, shuffle=False)

    # load prior, length model, grammar, and network
    load_iteration = NUM_ITERS
    log_prior = np.log(
        np.loadtxt('%s/prior.iter-' % result_root + str(load_iteration) +
                   '.txt'))
    grammar = PathGrammar('%s/grammar.txt' % result_root, label2index)
    length_model = PoissonModel('%s/lengths.iter-' % result_root +
                                str(load_iteration) + '.txt',
                                max_length=2000)
    forwarder = Forwarder(dataset.input_dimension,
                          dataset.n_classes,
                          feat_window_size=feat_window_size)
    forwarder.load_model('%s/network.iter-' % result_root +
                         str(load_iteration) + '.net')

    # parallelization
    n_threads = 4

    # Viterbi decoder
    viterbi_decoder = Viterbi(grammar,
                              length_model,
                              frame_sampling=30,
                              max_hypotheses=np.inf)
    # forward each video
    log_probs = dict()
    queue = mp.Queue()
    for i, data in enumerate(dataset):
        sequence, _ = data
        video = list(dataset.features.keys())[i]
        queue.put(video)
        log_probs[video] = forwarder.forward(sequence) - log_prior
        log_probs[video] = log_probs[video] - np.max(log_probs[video])
    # Viterbi decoding
    procs = []
    for i in range(n_threads):
        p = mp.Process(target=decode,
                       args=(queue, log_probs, viterbi_decoder, index2label,
                             result_root))
        procs.append(p)
        p.start()
    for p in procs:
        p.join()
Exemple #2
0
def infer(label2index, index2label, n_threads):
    # load models
    log_prior = np.log(np.loadtxt('results/prior'))
    grammar = PathGrammar('results/grammar', label2index)
    length_model = PoissonModel('results/mean_lengths', max_length=2000)
    forwarder = Forwarder('results/net.model')
    # Viterbi decoder (max_hypotheses = n: at each time step, prune all hypotheses worse than the top n)
    viterbi_decoder = Viterbi(grammar,
                              length_model,
                              frame_sampling=30,
                              max_hypotheses=50000)
    # create list of test videos
    with open('data/split1.test', 'r') as f:
        video_list = f.read().split('\n')[0:-1]
    # forward each video
    log_probs = dict()
    queue = mp.Queue()
    for video in video_list:
        queue.put(video)
        dataset = Dataset('data', [video], label2index)
        log_probs[video] = forwarder.forward(dataset) - log_prior
        log_probs[video] = log_probs[video] - np.max(log_probs[video])
    # Viterbi decoding
    procs = []
    for i in range(n_threads):
        p = mp.Process(target=decode,
                       args=(queue, log_probs, viterbi_decoder, index2label))
        procs.append(p)
        p.start()
    for p in procs:
        p.join()
def main(data_root, result_root, split, seed):

    result_root += "-s-%d-%d" % (split, seed)

    os.makedirs(result_root, exist_ok=True)

    ### read label2index mapping and index2label mapping ###########################
    label2index = dict()
    index2label = dict()
    # with open('data/mapping.txt', 'r') as f:
    with open(os.path.join(data_root, 'mapping.txt'), 'r') as f:
        content = f.read().split('\n')[0:-1]
        for line in content:
            label2index[line.split()[1]] = int(line.split()[0])
            index2label[int(line.split()[0])] = line.split()[1]

    ### read training data #########################################################
    print('read data...')
    # with open('data/split1.train', 'r') as f:
    with open(os.path.join(data_root, 'split%d.train' % split), 'r') as f:
        video_list = f.read().split('\n')[0:-1]
    dataset = Dataset(data_root, video_list, label2index, shuffle = True)
    print('done')

    ### generate path grammar for inference ########################################
    paths = set()
    for _, transcript in dataset:
        paths.add( ' '.join([index2label[index] for index in transcript]) )
    #with open('results/grammar.txt', 'w') as f:
    with open(os.path.join(result_root, 'grammar.txt'), 'w') as f:
        f.write('\n'.join(paths) + '\n')

    ### actual nn-viterbi training #################################################
    decoder = Viterbi(None, None, frame_sampling = 30, max_hypotheses = np.inf) # (None, None): transcript-grammar and length-model are set for each training sequence separately, see trainer.train(...)
    trainer = Trainer(decoder, dataset.input_dimension, dataset.n_classes, buffer_size = len(dataset), buffered_frame_ratio = 25)
    learning_rate = 0.01

    # train for 10000 iterations
    for i in tqdm(range(NUM_ITERS)):
        sequence, transcript = dataset.get()
        loss = trainer.train(sequence, transcript, batch_size = 512, learning_rate = learning_rate)
        # print some progress information
        if (i+1) % 100 == 0:
            print('Iteration %d, loss: %f' % (i+1, loss))
        # save model every 1000 iterations
        if (i+1) % 1000 == 0:
            print('save snapshot ' + str(i+1))
            # network_file = 'results/network.iter-' + str(i+1) + '.net'
            network_file = os.path.join(result_root, 'network.iter-' + str(i + 1) + '.net')
            # length_file = 'results/lengths.iter-' + str(i+1) + '.txt'
            length_file = os.path.join(result_root, 'lengths.iter-' + str(i + 1) + '.txt')
            # prior_file = 'results/prior.iter-' + str(i+1) + '.txt'
            prior_file = os.path.join(result_root, 'prior.iter-' + str(i + 1) + '.txt')
            trainer.save_model(network_file, length_file, prior_file)
        # adjust learning rate after 2500 iterations
        if (i+1) == 2500:
            learning_rate = learning_rate * 0.1
Exemple #4
0
'''
for _, transcript in dataset:
    count+=1
    path=[]
    for index in transcript:

        path.append(index2label[index])
    #print(path,' '.join(path))
    paths.add(' '.join(path))
'''
print(len(paths),count)
#with open(results_path+'grammar.txt', 'w') as f:
#    f.write('\n'.join(paths) + '\n')

### actual nn-viterbi training #################################################
decoder = Viterbi(None, None, frame_sampling = 5, max_hypotheses = np.inf) # (None, None): transcript-grammar and length-model are set for each training sequence separately, see trainer.train(...)
trainer = Trainer(decoder,  dataset.n_classes, buffer_size = len(dataset), buffered_frame_ratio = 1)
learning_rate = 0.00001
avg_loss=0
# train for 10000 iterations
for i in range(100000):
    sequence, transcript = dataset.get()
    #print('training',i)
    loss = trainer.train(sequence, transcript, batch_size = 1, learning_rate = learning_rate)
    avg_loss += loss
    # print some progress information
    if (i+1) % 10 == 0:
        print('Iteration %d, loss: %f  Average Loss %f' % (i + 1, loss, avg_loss / (i + 1)))
    # save model every 1000 iterations
    if (i+1) % 100 == 0:
        print('save snapshot ' + str(i+1))
    np.loadtxt(args.result_path + 'prior.iter-' + str(load_iteration) +
               '.txt'))
grammar = PathGrammar(args.result_path + 'grammar.txt', label2index)
length_model = PoissonModel(args.result_path + 'lengths.iter-' +
                            str(load_iteration) + '.txt',
                            max_length=2000)
forwarder = Forwarder(dataset.input_dimension, dataset.n_classes)
forwarder.load_model(args.result_path + 'network.iter-' + str(load_iteration) +
                     '.net')

# parallelization
n_threads = 4

# Viterbi decoder
viterbi_decoder = Viterbi(grammar,
                          length_model,
                          frame_sampling=30,
                          max_hypotheses=np.inf)
# forward each video
log_probs = dict()
queue = mp.Queue()
for i, data in enumerate(dataset):
    sequence, _ = data
    video = list(dataset.features.keys())[i]
    queue.put(video)
    log_probs[video] = forwarder.forward(sequence) - log_prior
    log_probs[video] = log_probs[video] - np.max(log_probs[video])
# Viterbi decoding
procs = []
for i in range(n_threads):
    p = mp.Process(target=decode,
                   args=(queue, log_probs, viterbi_decoder, index2label))
Exemple #6
0
log_prior = np.log(
    np.loadtxt('results/prior.iter-' + str(load_iteration) + '.txt'))
grammar = PathGrammar('results/grammar.txt', label2index)
length_model = PoissonModel('results/lengths.iter-' + str(load_iteration) +
                            '.txt',
                            max_length=2000)
forwarder = Forwarder(dataset.input_dimension, dataset.n_classes)
forwarder.load_model('results/network.iter-' + str(load_iteration) + '.net')
window = 10
step = 5

# parallelization
n_threads = 4

# Viterbi decoder
viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=30)
# forward each video
log_probs = dict()
queue = mp.Queue()
for i, data in enumerate(dataset):
    sequence, _ = data
    video = list(dataset.features.keys())[i]
    queue.put(video)
    log_probs[video] = forwarder.forward(
        sequence).data.cpu().numpy() - log_prior
    log_probs[video] = log_probs[video] - np.max(log_probs[video])
# Viterbi decoding
procs = []
for i in range(n_threads):
    p = mp.Process(target=stn_decode,
                   args=(queue, log_probs, viterbi_decoder, index2label,
Exemple #7
0
    # load your data here (must be in log space!!!):
    # Note that you might want to remove a prior first.
    log_probs = np.loadtxt(file_probs, dtype=np.float32)

    # sanity check
    print(np.max(log_probs))
    print(np.min(log_probs))

    # scale down if out of range
    if np.max(log_probs) > 0:
        log_probs = log_probs - (2 * np.max(log_probs))

    # Viterbi decoder (max_hypotheses = n: at each time step, prune all hypotheses worse than the top n)
    viterbi_decoder = Viterbi(grammar,
                              length_model,
                              frame_sampling=20,
                              max_hypotheses=50000)

    # Viterbi decoding
    print('Processing ' + file_probs)
    print('Result file ' + file_out)

    try:
        start = time.time()
        score, labels, segments = viterbi_decoder.decode(log_probs)
        end = time.time()
        print(end - start)
        # save result
        with open(file_out, 'w') as f:
            for l in labels:
                f.write(index2label[l] + '\n')
dataset = Dataset('data', video_list, label2index, shuffle=False)
print('done')

# length model, grammar, and network
grammar = NGram('results/grammar.txt', label2index, ngram_order=3)
length_model = MeanLengthModel(dataset.n_classes,
                               max_length=500,
                               threshold=200.0)

# parallelization
n_threads = 8

# Viterbi decoder
viterbi_decoder = Viterbi(grammar,
                          length_model,
                          frame_sampling=10,
                          pruning_factor=0.98,
                          max_segment_start_hyp=20)

# Viterbi decoding
q = mp.Queue()
for i, data in enumerate(dataset.features):
    video = list(dataset.features.keys())[i]
    q.put(video)
procs = []
for i in range(n_threads):
    p = mp.Process(target=decode,
                   args=(q, viterbi_decoder, index2label, dataset))
    procs.append(p)
    p.start()
for p in procs: