Python Instance Examples

Programming Language: Python

Namespace/Package Name: melt.instance

Class/Type: Instance

Examples at hotexamples.com: 2

Python Instance - 2 examples found. These are the top rated real world Python examples of melt.instance.Instance extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__str__(1)

fv(1)

get_features(1)

get_sequential_features(1)

get_static_features(1)

Example #1

Show file

File: pos_tagger.py Project: Camille31/Swip

 def generate_training_data(self, infile, feat_options={}, encoding='utf-8'):
     data_file_name = tempfile.mktemp()
     data_file = codecs.open(data_file_name,'w',encoding)
     inst_ct = 0
     for s in BrownReader(infile):
         # build token list for each sentence (urgh! FIXME)
         tokens = []
         for wd,tag in s:
             token = Token( string=wd, pos=tag )
             token.label = token.pos # label is POS tag
             tokens.append( token )
         # create training instance for each token
         for i in range(len(tokens)):
             inst_ct += 1
             os.write(0, "%s" %"\b"*len(str(inst_ct))+str(inst_ct))
             inst = Instance( label=tokens[i].label,\
                              index=i, tokens=tokens,\
                              feat_selection=feat_options,\
                              lex_dict=self.lex_dict,\
                              tag_dict=self.tag_dict,\
                              cache=self.cache )
             inst.get_features()
             print >> data_file, inst.__str__()
             # print >> sys.stderr, inst.__str__().encode('utf8')
     os.write(0,'\n')
     data_file.close()
     return data_file_name

Example #2

Show file

File: pos_tagger.py Project: Camille31/Swip

 def tag_token_sequence(self, tokens, feat_options={}, beam_size=3):
     ''' N-best breath search for the best tag sequence for each sentence'''
     # maintain N-best sequences of tagged tokens
     sequences = [([],0.0)]  # log prob.
     for i,token in enumerate(tokens):
         n_best_sequences = []
         # cache static features
         cached_inst = Instance( label=tokens[i].label,
                                 index=i, tokens=tokens,
                                 feat_selection=feat_options,
                                 lex_dict=self.lex_dict,
                                 tag_dict=self.tag_dict,
                                 cache=self.cache )
         cached_inst.get_static_features()
         # get possible tags: union of tags found in tag_dict and
         # lex_dict
         wd = token.string
         legit_tags1 = self.tag_dict.get(wd,{})
         legit_tags2 = {} # self.lex_dict.get(wd,{}) 
         for j,seq in enumerate(sequences):
             seq_j,log_pr_j = sequences[j]
             tokens_j = seq_j+tokens[i:] # tokens with previous labels
             # classify token
             inst = Instance( label=tokens[i].label,
                              index=i, tokens=tokens_j, 
                              feat_selection=feat_options,
                              lex_dict=self.lex_dict,
                              tag_dict=self.tag_dict,
                              cache=self.cache )
             inst.fv = cached_inst.fv[:]
             inst.get_sequential_features()
             label_pr_distrib = self.classifier.class_distribution(inst.fv)
             # extend sequence j with current token
             for (cl,pr) in label_pr_distrib:
                 # make sure that cl is a legal tag
                 if legit_tags1 or legit_tags2:
                     if (cl not in legit_tags1) and (cl not in legit_tags2):
                         continue
                 labelled_token = Token(string=token.string,pos=token.pos,\
                                        comment=token.comment,\
                                        label=cl,label_pr_distrib=label_pr_distrib)
                 n_best_sequences.append((seq_j+[labelled_token],log_pr_j+math.log(pr)))
         # sort sequences
         n_best_sequences.sort( key=operator.itemgetter(1) )
         # keep N best
         sequences = n_best_sequences[-beam_size:]
     # return sequence with highest prob. 
     best_sequence = sequences[-1][0]
     # print >> sys.stderr, "Best tok seq:", [(t.string,t.label) for t in best_sequence]
     return best_sequence