Exemple #1
0
 def __init__(self, train=False, cv=True, folds=5, trained_model_name="trained_model.pkl", feat_index_name="feature_index.pkl", stored_tagset="tagset.pkl"):
   self.trained_model_name = trained_model_name
   self.feat_index_name = feat_index_name
   self.stored_tagset = stored_tagset
   self.cv = cv
   self.folds = folds
   self.fp = FeatureProcessing()
   if train:
     print >>sys.stderr, "Statement classifier initialized for training."
     if self.cv:
       print >>sys.stderr, "Cross-validation will be done"
   else:
     self.classifier = pickle.load(open(self.trained_model_name, "rb"))
     feat_index = pickle.load(open(self.feat_index_name, "rb"))
     self.tagset = pickle.load(open(self.stored_tagset, "rb"))
     self.fp.feat_index = feat_index
     print >>sys.stderr, "Stored model loaded. Statement classifier initialized for prediction."
Exemple #2
0
 def __init__(self, do_train=False, trained_model_name="passage_crf_model", algorithm="crf"):
   self.trained_model_name = trained_model_name
   self.fp = FeatureProcessing()
   self.do_train = do_train
   self.algorithm = algorithm
   if algorithm == "crf":
     if do_train:
       self.trainer = Trainer()
     else:
       self.tagger = Tagger()
   else:
     if do_train:
       model = ChainCRF()
       self.trainer = FrankWolfeSSVM(model=model)
       self.feat_index = {}
       self.label_index = {}
     else:
       self.tagger = pickle.load(open(self.trained_model_name, "rb"))
       self.feat_index = pickle.load(open("ssvm_feat_index.pkl", "rb"))
       label_index = pickle.load(open("ssvm_label_index.pkl", "rb"))
       self.rev_label_index = {i: x for x, i in label_index.items()}
Exemple #3
0
import sys
import codecs
import pickle
from features import FeatureProcessing

trainfile_name = sys.argv[1]
outfile_name = sys.argv[2]
fp = FeatureProcessing()
train_data = [
    tuple(x.strip().split("\t"))
    for x in codecs.open(trainfile_name, "r", "utf-8")
]
train_labels, train_clauses = zip(*train_data)
print >> sys.stderr, "Indexing features.."
fp.index_data(train_clauses)
feats = [fp.featurize(clause) for clause in train_clauses]

pickle.dump((feats, train_labels, fp.feat_index), open(outfile_name, "wb"))