Ejemplo n.º 1
0
# However, the sequence segmentation trainer has some optional parameters we can
# set.  These parameters determine properties of the segmentation model we will
# learn.  See the dlib documentation for the sequence_segmenter object for a
# full discussion of their meanings.
params = dlib.segmenter_params()
params.window_size = 3
params.use_high_order_features = True
params.use_BIO_model = True
# This is the common SVM C parameter.  Larger values encourage the trainer to
# attempt to fit the data exactly but might overfit.  In general, you determine
# this parameter by cross-validation.
params.C = 10

# Train a model.  The model object is responsible for predicting the locations
# of names in new sentences.
model = dlib.train_sequence_segmenter(training_sequences, segments, params)

# Let's print out the things the model thinks are names.  The output is a set
# of ranges which are predicted to contain names.  If you run this example
# program you will see that it gets them all correct.
for i, s in enumerate(sentences):
    print_segment(s, model(training_sequences[i]))

# Let's also try segmenting a new sentence.  This will print out "Bob Bucket".
# Note that we need to remember to use the same vector representation as we used
# during training.
test_sentence = "There once was a man from Nantucket " \
                "whose name rhymed with Bob Bucket"
if use_sparse_vects:
    print_segment(test_sentence,
                  model(sentence_to_sparse_vectors(test_sentence)))
# Now that we have a simple training set we can train a sequence segmenter.  However, the
# sequence segmentation trainer has some optional parameters we can set.  These parameters
# determine properties of the segmentation model we will learn.  See the dlib documentation
# for the sequence_segmenter object for a full discussion of their meanings.
params = dlib.segmenter_params()
params.window_size = 3
params.use_high_order_features = True 
params.use_BIO_model = True
# This is the common SVM C parameter.  Larger values encourage the trainer to attempt to
# fit the data exactly but might overfit.  In general, you determine this parameter by
# cross-validation.
params.C = 10

# Train a model.  The model object is responsible for predicting the locations of names in
# new sentences.
model = dlib.train_sequence_segmenter(training_sequences, segments, params)


# Lets print out the things the model thinks are names.  The output is a set of ranges
# which are predicted to contain names.  If you run this example program you will see that
# it gets them all correct. 
for i in range(len(sentences)):
    print_segment(sentences[i], model(training_sequences[i]))

# Lets also try segmenting a new sentence.  This will print out "Bob Bucket".  Note that we
# need to remember to use the same vector representation as we used during training.
test_sentence = "There once was a man from Nantucket whose name rhymed with Bob Bucket"
if use_sparse_vects:
    print_segment(test_sentence, model(sentence_to_sparse_vectors(test_sentence)))
else:
    print_segment(test_sentence, model(sentence_to_vectors(test_sentence)))