Python Corpus.Sentence Examples

Programming Language: Python

Class/Type: Corpus

Method/Function: Sentence

Examples at hotexamples.com: 4

Python Corpus.Sentence - 4 examples found. These are the top rated real world Python examples of Corpus.Sentence from package differentiable-plasticity extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Corpus(12)

TRECReader(8)

TRECWriter(5)

Word(5)

Sentence(4)

Document(2)

MonoCorpus(2)

Vocabulary(2)

ValCorpus(1)

list_of_documents(1)

sentences(1)

setname(1)

Example #1

Show file

    def __init__(self, loc=MOHX_LOCATION):
        self.instances, self.words = [], []

        c = 0
        for line in open(loc).readlines()[1:]:
            sentence = Corpus.Sentence()
            data = line.split(",")
            sentence.id = str(c)
            c += 1
            word_data = data[3].split()

            for i in range(len(word_data)):
                met = "N"
                if i == int(data[-2]):
                    met = "tag-" + data[-1].strip()
                w = Corpus.Word(text=word_data[i],
                                met=met,
                                sentence=sentence,
                                index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

        Corpus.add_dependencies(self.instances, MOHX_DEPS, lex_field=1)

Example #2

Show file

    def __init__(self):
        super().__init__()
        self.instances, self.words = [], []
        lemmatizer = WordNetLemmatizer()
        cur_verb, cluster = "", ""

        for line in open(TROFI_LOCATION).readlines():
            if re.match(r"\*\*\*[a-z]", line):
                cur_verb = line.split("***")[1]
                continue
            elif "*" in line or not line.strip():
                if "literal" in line:
                    cluster = "literal"
                elif "nonliteral" in line:
                    cluster = "nonliteral"
                continue

            sentence = Corpus.Sentence()
            data = line.strip().split("\t")
            sentence.id = data[0]

            met = ""
            if "N" in data[1]:
                met = "met"
            if "L" in data[1]:
                met = "N"
            if "U" in data[1]:
                met = "?"

            for i in range(len(data[2].split())):
                word = data[2].split()[i]
                v_lem = lemmatizer.lemmatize(word, "v")
                cur_met = "N"
                if v_lem == cur_verb:
                    cur_met = "tag-" + met
                w = Corpus.Word(text=word,
                                met=cur_met,
                                sentence=sentence,
                                index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

        Corpus.add_dependencies(self.instances, TROFI_DEPS, lex_field=1)

Example #3

Show file

    def __init__(self, corpus_location):
        self.instances, self.words = [], []
        data = csv.reader(open(corpus_location))
        next(data)
        for line in data:
            sentence = Corpus.Sentence()
            sentence.id = line[1]

            index = int(line[-2])
            tag = int(line[-1])

            sent_data = line[3].split()
            for i in range(len(sent_data)):
                word = sent_data[i]
                met = "N"
                if i == index:
                    met = "met"
                w = Corpus.Word(text=word, sentence=sentence, met=met, index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

Example #4

Show file

def load_vuamc_csv(filename=VUAMC_CSV):
    with codecs.open(filename, encoding="latin-1", errors='replace') as f:
        data = [line for line in csv.reader(f)]

    sentences = []
    all_words = []

    for sent_index in range(1, len(data[1:])):
        line_data = data[sent_index]
        if not line_data:
            continue
        sentence = Corpus.Sentence()

        sentence.source_file = line_data[0]

        if sentence.source_file in ACADEMIC:
            sentence.domain = "academic"
        elif sentence.source_file in CONVERSATION:
            sentence.domain = "conversation"
        elif sentence.source_file in FICTION:
            sentence.domain = "fiction"
        elif sentence.source_file in NEWS:
            sentence.domain = "news"
        sentence.id = line_data[1]

        words = line_data[2]

        j = 0
        for i in range(0, len(words.split())):
            w_data = words.split()[i].split(";;")
            if "M_" in w_data[-1]:
                met = "met"
                word_text = w_data[-1][2:]
            else:
                met = "N"
                word_text = w_data[-1]

            pos = w_data[0]
            lemma = w_data[1]

            for extra_words in word_text.split("_"):
                if not set(extra_words).intersection(
                        str(string.punctuation + string.ascii_letters +
                            string.digits)):
                    sentence.words.append(
                        Corpus.Word(text="none",
                                    met="none",
                                    pos="none",
                                    lemma="none",
                                    sentence=sentence,
                                    index=j))
                    j += 1
                    continue

                word = Corpus.Word(text=extra_words,
                                   met=met,
                                   pos=pos,
                                   lemma=lemma,
                                   sentence=sentence,
                                   index=j)

                sentence.words.append(word)
                all_words.append(word)
                j += 1
        sentences.append(sentence)

    Corpus.add_dependencies(sentences, VUAMC_DEPS)
    Corpus.add_vn_parse(sentences, VUAMC_VN)
    Corpus.add_allen_parse(sentences, VUAMC_ALLEN)
    #Corpus.populate_vn_from_heads(sentences)

    return sentences, all_words