コード例 #1
0
ファイル: parse.py プロジェクト: JoeHill/arbitrage
def __preprocess_sentence(sentence):
    """
    Preprocesses a sentence for the grammar parser. Words are concatenated with their adjoining punctuation marks. This method provides heuristics to recognize those entities prior to processing.
    
    :param str sentence: The sentence to preprocess.
    
    :rtype list(str): The list of proper nouns in the pre-processed sentence and fillers to replace removed, recognized entities without altering the grammatical structure.
    """
    split_sentence = re.split(r"\s+", sentence) 
    list_indicies = utils.comma_delimited_list_indicies(split_sentence)
    if list_indicies: 
        sentences = utils.enumerate_sentence_with_list(split_sentence, list_indicies)
       
    return None
コード例 #2
0
ファイル: unit.py プロジェクト: JoeHill/arbitrage
    def test_enumerate_sentence_with_list(self):
        # first_indicies = utils.comma_delimited_list_indicies( self.sentence_with_list )
        second_indicies = utils.comma_delimited_list_indicies(self.sentence_with_two_lists)

        # print utils.enumerate_sentence_with_list( self.sentence_with_list, first_indicies )
        sens = utils.enumerate_sentence_with_list(self.sentence_with_two_lists, second_indicies)

        eq_(
            sens[0],
            [
                "This",
                "is",
                "a",
                "sentence",
                "listing",
                "vestiges",
                "and",
                "boolean,",
                "but",
                "it",
                "also",
                "lists",
                "the",
                "hats",
                "and",
                "gloves.",
            ],
        )
        eq_(
            sens[1],
            [
                "This",
                "is",
                "a",
                "sentence",
                "listing",
                "aardvarks",
                "and",
                "boolean,",
                "but",
                "it",
                "also",
                "lists",
                "the",
                "shoes",
                "and",
                "gloves.",
            ],
        )
        eq_(
            sens[2],
            [
                "This",
                "is",
                "a",
                "sentence",
                "listing",
                "igloos",
                "and",
                "boolean,",
                "but",
                "it",
                "also",
                "lists",
                "the",
                "carribean",
                "and",
                "gloves.",
            ],
        )
コード例 #3
0
ファイル: unit.py プロジェクト: JoeHill/arbitrage
 def test_comma_delimited_list_indicies(self):
     first_indicies = utils.comma_delimited_list_indicies(self.sentence_with_list)
     second_indicies = utils.comma_delimited_list_indicies(self.sentence_with_two_lists)
     eq_(first_indicies[0], set([5, 6, 7]))
     eq_(second_indicies[0], set([5, 6, 7]))
     eq_(second_indicies[1], set([16, 17, 15]))