Python Corpus.texts Examples

Programming Language: Python

Namespace/Package Name: structures

Class/Type: Corpus

Method/Function: texts

Examples at hotexamples.com: 5

Python Corpus.texts - 5 examples found. These are the top rated real world Python examples of structures.Corpus.texts extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Corpus(5)

texts(3)

Frequently Used Methods

Corpus (5)

texts (3)

Example #1

Show file

File: stupid_splitter.py Project: tchewik/rusclasp

def main():
    t1 = time.time()

    new_corpus = Corpus(u'/home/gree-gorey/stupid/')

    spans = 0

    for text in new_corpus.texts(u'json'):
        text.sentence_splitter()
        for sentence in text.sentences:

            sentence.stupid_span_splitter()

            spans += len(sentence.spans)

            for span in sentence.spans:
                span.get_boundaries()

        text.write_stupid_clause_ann()

        text.copy_into_brat(u'/opt/brat-v1.3_Crunchy_Frog/data/stupid/')

    print spans

    t2 = time.time()

    print t2 - t1

Example #2

Show file

File: dummy_ann.py Project: nasedkinav/rusclasp

def main():
    t1 = time.time()

    new_corpus = Corpus('/home/gree-gorey/CorpusTest/')

    for text in new_corpus.texts('txt'):

        text.write_dummy_ann()

        text.copy_into_brat('/opt/brat-v1.3_Crunchy_Frog/data/left/', True)

    t2 = time.time()

    print(t2 - t1)

Example #3

Show file

File: dummy_ann.py Project: gree-gorey/rusclasp

def main():
    t1 = time.time()

    new_corpus = Corpus(u'/home/gree-gorey/CorpusTest/')

    for text in new_corpus.texts(u'txt'):

        text.write_dummy_ann()

        text.copy_into_brat(u'/opt/brat-v1.3_Crunchy_Frog/data/left/', True)

    t2 = time.time()

    print t2 - t1

Example #4

Show file

File: split.py Project: gree-gorey/rusclasp

def main():
    t1 = time.time()

    new_corpus = Corpus(u'/home/gree-gorey/CorpusTemp/')
    # new_corpus = Corpus(u'/home/gree-gorey/tested/')
    # new_corpus = Corpus(u'/home/gree-gorey/tested_tested/')

    for text in new_corpus.texts(u'json'):
        text.sentence_splitter()
        # print len(text.sentences)
        for sentence in text.sentences:
            # print sentence.tokens[0].content

            # for token in sentence.tokens:
            #     print token.pos, token.content, token.lex
            # print u'***************'

            # sentence.find_pp()

            # sentence.find_coordination()

            sentence.find_complimentizers()

            sentence.find_names()

            sentence.eliminate_pair_comma()

            # for token in sentence.tokens:
            #     print token.pos, token.content
            # print u'***************'
            # print

            sentence.span_splitter()

            sentence.get_shared_tokens()  # loop through all the spans 1

            sentence.split_double_complimentizers()  # loop through all the spans 2

            for span in sentence.spans:  # loop through all the spans 3

                # decide whether span is inserted or embedded or neither
                span.type()
                # print span.tokens[0].content, span.embedded_type

            # split embedded span if it contains > 1 predicate
            sentence.split_embedded()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content

            # walk through spans and join whenever possible
            sentence.restore_embedded()

            sentence.split_base()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content, span.finite()

            sentence.restore_base()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content, span.finite()

            for span in sentence.spans:
                span.get_boundaries()
                # print span.quasi_embedded, span.tokens[0].content

        text.write_clause_ann()

        text.copy_into_brat(u'/opt/brat-v1.3_Crunchy_Frog/data/right/')

    t2 = time.time()

    print t2 - t1

Example #5

Show file

def main():
    t1 = time.time()

    new_corpus = Corpus(u'/home/gree-gorey/CorpusTemp/')
    # new_corpus = Corpus(u'/home/gree-gorey/tested/')
    # new_corpus = Corpus(u'/home/gree-gorey/tested_tested/')

    for text in new_corpus.texts(u'json'):
        text.sentence_splitter()
        # print len(text.sentences)
        for sentence in text.sentences:
            # print sentence.tokens[0].content

            # for token in sentence.tokens:
            #     print token.pos, token.content, token.lex
            # print u'***************'

            # sentence.find_pp()

            # sentence.find_coordination()

            sentence.find_complimentizers()

            sentence.find_names()

            sentence.eliminate_pair_comma()

            # for token in sentence.tokens:
            #     print token.pos, token.content
            # print u'***************'
            # print

            sentence.span_splitter()

            sentence.get_shared_tokens()  # loop through all the spans 1

            sentence.split_double_complimentizers(
            )  # loop through all the spans 2

            for span in sentence.spans:  # loop through all the spans 3

                # decide whether span is inserted or embedded or neither
                span.type()
                # print span.tokens[0].content, span.embedded_type

            # split embedded span if it contains > 1 predicate
            sentence.split_embedded()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content

            # walk through spans and join whenever possible
            sentence.restore_embedded()

            sentence.split_base()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content, span.finite()

            sentence.restore_base()

            # for span in sentence.spans:
            #     print span.shared_tokens[0].content, span.tokens[0].content, span.finite()

            for span in sentence.spans:
                span.get_boundaries()
                # print span.quasi_embedded, span.tokens[0].content

        text.write_clause_ann()

        text.copy_into_brat(u'/opt/brat-v1.3_Crunchy_Frog/data/right/')

    t2 = time.time()

    print t2 - t1