def __init__(self, _model_path = paths.SEG_MODEL_PATH, _model_file = None, _scale_model_file = None,
                 verbose=False, _name="segmenter", dependencies = False):
        self.svm_classifier = SVMClassifier(class_type = 'bin', software = 'libsvm', 
                                         model_path = _model_path, bin_model_file = _model_file, bin_scale_model_file = _scale_model_file,
                                         output_filter = float,
                                         name= _name, verbose = verbose)

        self.feature_writer = SegFeatureWriter(verbose = verbose)
        
        self.syntax_parser = SyntaxParser(verbose = verbose, dependencies = dependencies)
class Segmenter: 
    
    penn_special_chars = {'-LRB-': '(', '-RRB-': ')', '-LAB-': '<', '-RAB-': '>',
                        '-LCB-': '{', '-RCB-': '}', '-LSB-': '[', '-RSB-':']',
                      '\\/' : '/', '\\*' : '*', '``' : '"', "''" : '"', "`" : "'"}
    
    def __init__(self, _model_path = paths.SEG_MODEL_PATH, _model_file = None, _scale_model_file = None,
                 verbose=False, _name="segmenter", dependencies = False):
        self.svm_classifier = SVMClassifier(class_type = 'bin', software = 'libsvm', 
                                         model_path = _model_path, bin_model_file = _model_file, bin_scale_model_file = _scale_model_file,
                                         output_filter = float,
                                         name= _name, verbose = verbose)

        self.feature_writer = SegFeatureWriter(verbose = verbose)
        
        self.syntax_parser = SyntaxParser(verbose = verbose, dependencies = dependencies)
    

    def create_lexicalized_tree(self, mrg, heads):
        """
        Creates a lexicalized syntax tree given a MRG-style parse and a Penn2Malt style heads file. 
        """
        t = LexicalizedTree.parse(mrg, leaf_pattern = '(?<=\\s)[^\)\(]+')  # Vanessa's modification
        t.lexicalize(heads, from_string = True)
        
        return t
    
    
    def split_by_sentence(self, text):
        """
        Takes a text and returns a list of (sentence, is_paragraph_boundary) elements
        Assumes that the text is pre-processed such that end of sentences are marked with <s>, end of paragraphs with <p>
        """
        result = []
        
        text = text.replace("\n", "")
        
        parse_pos = 0
        prev_pos = 0
        
        while parse_pos < len(text):
            
            next_tok = text[parse_pos:parse_pos + 3]
            
            if next_tok == "<s>" or next_tok == "<p>":
                result.append((text[prev_pos:parse_pos].strip(), next_tok))
                parse_pos = parse_pos + 3
                prev_pos = parse_pos
            else:
                parse_pos = parse_pos + 1
            
        return result
    
    def segment_tree(self, t):
        """
        Segments a text represented as a lexicalized syntax trees
        Returns a list of class labels for each token of the tree
        """
        data_to_classify = self.feature_writer.extract_features([t])
        
        results = []
        for d in data_to_classify:
            #print d
            results.append(self.svm_classifier.classify(d))
        
        return results
    
    def get_parsed_trees_from_string(self, tree_strings):
        # tree_strings separated by "\n"
        parsed_trees = []
        for line in tree_strings:
            line = line.strip()
            if line != '':
                parsed_trees.append(LexicalizedTree.parse(line, leaf_pattern = '(?<=\\s)[^\)\(]+'))
       
        return parsed_trees


    def get_deps(self, deps_filename):
        try:
            dep_f = open(deps_filename, 'r')
            deps = []
            sent_dep_str = ''
            
            started = True
            for line in dep_f.readlines():
                line = line.strip()
                if line == '' and started:
                    started = False
                    deps.append(sent_dep_str)       
                    sent_dep_str = ''
                else:
                    started = True
                    sent_dep_str += '\n' + line
            dep_f.close()
            return deps
        except Exception, e:
            print "*** Could not read the input file..."
            raise