# However, the sequence segmentation trainer has some optional parameters we can # set. These parameters determine properties of the segmentation model we will # learn. See the dlib documentation for the sequence_segmenter object for a # full discussion of their meanings. params = dlib.segmenter_params() params.window_size = 3 params.use_high_order_features = True params.use_BIO_model = True # This is the common SVM C parameter. Larger values encourage the trainer to # attempt to fit the data exactly but might overfit. In general, you determine # this parameter by cross-validation. params.C = 10 # Train a model. The model object is responsible for predicting the locations # of names in new sentences. model = dlib.train_sequence_segmenter(training_sequences, segments, params) # Let's print out the things the model thinks are names. The output is a set # of ranges which are predicted to contain names. If you run this example # program you will see that it gets them all correct. for i, s in enumerate(sentences): print_segment(s, model(training_sequences[i])) # Let's also try segmenting a new sentence. This will print out "Bob Bucket". # Note that we need to remember to use the same vector representation as we used # during training. test_sentence = "There once was a man from Nantucket " \ "whose name rhymed with Bob Bucket" if use_sparse_vects: print_segment(test_sentence, model(sentence_to_sparse_vectors(test_sentence)))
# Now that we have a simple training set we can train a sequence segmenter. However, the # sequence segmentation trainer has some optional parameters we can set. These parameters # determine properties of the segmentation model we will learn. See the dlib documentation # for the sequence_segmenter object for a full discussion of their meanings. params = dlib.segmenter_params() params.window_size = 3 params.use_high_order_features = True params.use_BIO_model = True # This is the common SVM C parameter. Larger values encourage the trainer to attempt to # fit the data exactly but might overfit. In general, you determine this parameter by # cross-validation. params.C = 10 # Train a model. The model object is responsible for predicting the locations of names in # new sentences. model = dlib.train_sequence_segmenter(training_sequences, segments, params) # Lets print out the things the model thinks are names. The output is a set of ranges # which are predicted to contain names. If you run this example program you will see that # it gets them all correct. for i in range(len(sentences)): print_segment(sentences[i], model(training_sequences[i])) # Lets also try segmenting a new sentence. This will print out "Bob Bucket". Note that we # need to remember to use the same vector representation as we used during training. test_sentence = "There once was a man from Nantucket whose name rhymed with Bob Bucket" if use_sparse_vects: print_segment(test_sentence, model(sentence_to_sparse_vectors(test_sentence))) else: print_segment(test_sentence, model(sentence_to_vectors(test_sentence)))