Ejemplo n.º 1
0
    # Make an array of arrays of dlib.sparse_vector objects.
    training_sequences = dlib.sparse_vectorss()
    for s in sentences:
        training_sequences.append(sentence_to_sparse_vectors(s))
else:
    # Make an array of arrays of dlib.vector objects.
    training_sequences = dlib.vectorss()
    for s in sentences:
        training_sequences.append(sentence_to_vectors(s))

# Now that we have a simple training set we can train a sequence segmenter.
# However, the sequence segmentation trainer has some optional parameters we can
# set.  These parameters determine properties of the segmentation model we will
# learn.  See the dlib documentation for the sequence_segmenter object for a
# full discussion of their meanings.
params = dlib.segmenter_params()
params.window_size = 3
params.use_high_order_features = True
params.use_BIO_model = True
# This is the common SVM C parameter.  Larger values encourage the trainer to
# attempt to fit the data exactly but might overfit.  In general, you determine
# this parameter by cross-validation.
params.C = 10

# Train a model.  The model object is responsible for predicting the locations
# of names in new sentences.
model = dlib.train_sequence_segmenter(training_sequences, segments, params)

# Let's print out the things the model thinks are names.  The output is a set
# of ranges which are predicted to contain names.  If you run this example
# program you will see that it gets them all correct.
    training_sequences = dlib.sparse_vectorss()
    for s in sentences:
        training_sequences.append(sentence_to_sparse_vectors(s))
else:
    # Make an array of arrays of dlib.vector objects.
    training_sequences = dlib.vectorss()
    for s in sentences:
        training_sequences.append(sentence_to_vectors(s))



# Now that we have a simple training set we can train a sequence segmenter.  However, the
# sequence segmentation trainer has some optional parameters we can set.  These parameters
# determine properties of the segmentation model we will learn.  See the dlib documentation
# for the sequence_segmenter object for a full discussion of their meanings.
params = dlib.segmenter_params()
params.window_size = 3
params.use_high_order_features = True 
params.use_BIO_model = True
# This is the common SVM C parameter.  Larger values encourage the trainer to attempt to
# fit the data exactly but might overfit.  In general, you determine this parameter by
# cross-validation.
params.C = 10

# Train a model.  The model object is responsible for predicting the locations of names in
# new sentences.
model = dlib.train_sequence_segmenter(training_sequences, segments, params)


# Lets print out the things the model thinks are names.  The output is a set of ranges
# which are predicted to contain names.  If you run this example program you will see that