Python concat Examples, nltk.corpus.reader.util.concat Python Examples

Example #1

0

Show file

File: nkjp.py Project: esabelhaus/secret-octo-dubstep

 def header(self, fileids=None, **kwargs):
     """
     Returns header(s) of specified fileids.
     """
     return concat([self._view(self.add_root(fileid),
                               mode=NKJPCorpusReader.HEADER_MODE, **kwargs).handle_query()
                    for fileid in fileids])

Example #2

0

Show file

File: nkjp.py Project: esabelhaus/secret-octo-dubstep

 def sents(self, fileids=None, **kwargs):
     """
     Returns sentences in specified fileids.
     """
     return concat([self._view(self.add_root(fileid),
                               mode=NKJPCorpusReader.SENTS_MODE, **kwargs).handle_query()
                    for fileid in fileids])

Example #3

0

Show file

File: ipipan.py Project: prz3m/kind2anki

 def words(self, fileids=None, **kwargs):
     return concat(
         [
             self._view(fileid, tags=False, **kwargs)
             for fileid in self._list_morph_files(fileids)
         ]
     )

Example #4

0

Show file

File: ipipan.py Project: prz3m/kind2anki

 def tagged_paras(self, fileids=None, **kwargs):
     return concat(
         [
             self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE, **kwargs)
             for fileid in self._list_morph_files(fileids)
         ]
     )

Example #5

0

Show file

File: nkjp.py Project: esabelhaus/secret-octo-dubstep

 def raw(self, fileids=None, **kwargs):
     """
     Returns words in specified fileids.
     """
     return concat([self._view(self.add_root(fileid),
                               mode=NKJPCorpusReader.RAW_MODE, **kwargs).handle_query()
                    for fileid in fileids])

Example #6

0

Show file

File: corpus.py Project: miyamofigo/Japanese-corpus-and-utility

 def parsed_sents2(self, fileids=None):
   return concat([JapaneseCorpusView(fileid, enc,
                                     False, False, False, True,
                                     self._syntax_parser,
                                     self._word_tokenizer,
                                     self._sent_tokenizer,
                                     self._case_parser)
                  for (fileid, enc) in self.abspaths(fileids, True)])

Example #7

0

Show file

File: aligned.py Project: approximatelylinear/nltk

 def raw(self, fileids=None):
     """
     :return: the given file(s) as a single string.
     :rtype: str
     """
     if fileids is None: fileids = self._fileids
     elif isinstance(fileids, basestring): fileids = [fileids]
     return concat([self.open(f).read() for f in fileids])

Example #8

0

Show file

File: syntacticNetwork.py Project: francescomambrini/research-projects

def fixed_parsed_sents(self, fileids=None, top_label="root"):
    from nltk.corpus.reader.util import concat
    from nltk.corpus.reader.dependency import DependencyCorpusView
    from nltk.parse import DependencyGraph
    
    sents=concat([DependencyCorpusView(fileid, False, True, True, encoding=enc)
                  for fileid, enc in self.abspaths(fileids, include_encoding=True)])
    return [DependencyGraph(sent, top_relation_label=top_label, cell_separator="\t") for sent in sents]

Example #9

0

Show file

File: nkjp.py Project: esabelhaus/secret-octo-dubstep

 def tagged_words(self, fileids=None, **kwargs):
     """
     Call with specified tags as a list, e.g. tags=['subst', 'comp'].
     Returns tagged words in specified fileids.
     """
     tags = kwargs.pop('tags', [])
     return concat([self._view(self.add_root(fileid),
                               mode=NKJPCorpusReader.WORDS_MODE, tags=tags, **kwargs).handle_query()
                    for fileid in fileids])

Example #10

0

Show file

File: twitter.py Project: Weiming-Hu/text-based-six-degree

 def raw(self, fileids=None):
     """
     Return the corpora in their raw form.
     """
     if fileids is None:
         fileids = self._fileids
     elif isinstance(fileids, string_types):
         fileids = [fileids]
     return concat([self.open(f).read() for f in fileids])

Example #11

0

Show file

File: bnc.py Project: prz3m/kind2anki

 def _views(self, fileids=None, sent=False, tag=False, strip_space=True, stem=False):
     """A helper function that instantiates BNCWordViews or the list of words/sentences."""
     f = BNCWordView if self._lazy else self._words
     return concat(
         [
             f(fileid, sent, tag, strip_space, stem)
             for fileid in self.abspaths(fileids)
         ]
     )

Example #12

0

Show file

File: ipipan.py Project: prz3m/kind2anki

 def sents(self, fileids=None, **kwargs):
     return concat(
         [
             self._view(
                 fileid, mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs
             )
             for fileid in self._list_morph_files(fileids)
         ]
     )

Example #13

0

Show file

File: aligned.py Project: approximatelylinear/nltk

 def aligned_sents(self, fileids=None):
     """
     :return: the given file(s) as a list of AlignedSent objects.
     :rtype: list of C{AlignedSent}
     """
     return concat([AlignedSentCorpusView(fileid, enc, True, True,
                                          self._word_tokenizer,
                                          self._sent_tokenizer,
                                          self._alignedsent_block_reader)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #14

0

Show file

File: muc.py Project: knowlp/nltk_contrib

 def parsed_docs(self, fileids=None):
     """
     @return: A list of parsed corpus documents.
     @rtype: C{list} of C{StreamBackedCorpusView}
     @param fileids: A list of corpus files.
     @type fileids: C{list} of C{str} or regular expression
     """        
     return concat([StreamBackedCorpusView(fileid,
                                           self._read_parsed_block,
                                           encoding=enc)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #15

0

Show file

File: aligned_corpus_reader.py Project: yochananmkp/clir

 def words(self, fileids=None):
     """
     @return: the given file(s) as a list of words
         and punctuation symbols.
     @rtype: C{list} of C{str}
     """
     return concat([self._alignedsent_corpus_view(fileid, enc, False, False,
                                          self._word_tokenizer,
                                          self._sent_tokenizer,
                                          self._alignedsent_block_reader)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #16

0

Show file

File: aligned.py Project: approximatelylinear/nltk

 def words(self, fileids=None):
     """
     :return: the given file(s) as a list of words
         and punctuation symbols.
     :rtype: list of str
     """
     return concat([AlignedSentCorpusView(fileid, enc, False, False,
                                          self._word_tokenizer,
                                          self._sent_tokenizer,
                                          self._alignedsent_block_reader)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #17

0

Show file

File: twitter.py Project: Weiming-Hu/text-based-six-degree

    def docs(self, fileids=None):
        """
        Returns the full Tweet objects, as specified by `Twitter
        documentation on Tweets
        <https://dev.twitter.com/docs/platform-objects/tweets>`_

        :return: the given file(s) as a list of dictionaries deserialised
        from JSON.
        :rtype: list(dict)
        """
        return concat([self.CorpusView(path, self._read_tweets, encoding=enc)
                       for (path, enc, fileid) in self.abspaths(fileids, True, True)])

Example #18

0

Show file

File: aligned.py Project: approximatelylinear/nltk

 def sents(self, fileids=None):
     """
     :return: the given file(s) as a list of
         sentences or utterances, each encoded as a list of word
         strings.
     :rtype: list of (list of str)
     """
     return concat([AlignedSentCorpusView(fileid, enc, False, True,
                                          self._word_tokenizer,
                                          self._sent_tokenizer,
                                          self._alignedsent_block_reader)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #19

0

Show file

File: muc.py Project: knowlp/nltk_contrib

 def raw(self, fileids=None):
     """
     @return: A list of corpus file contents.
     @rtype: C{list} of C{str}
     @param fileids: A list of corpus files.
     @type fileids: C{list} of C{str} or regular expression
     """
     if fileids is None:
         fileids = self._fileids
     elif isinstance(fileids, basestring): 
         fileids = [fileids]
     return concat([self.open(f).read() for f in fileids])

Example #20

0

Show file

File: aligned_corpus_reader.py Project: yochananmkp/clir

 def sents(self, fileids=None):
     """
     @return: the given file(s) as a list of
         sentences or utterances, each encoded as a list of word
         strings.
     @rtype: C{list} of (C{list} of C{str})
     """
     return concat([self._alignedsent_corpus_view(fileid, enc, False, True,
                                          self._word_tokenizer,
                                          self._sent_tokenizer,
                                          self._alignedsent_block_reader)
                    for (fileid, enc) in self.abspaths(fileids, True)])

Example #21

0

Show file

File: ancora_pcfg.py Project: santiagovidal/gramPLN

def lemmatized_sents(corpus,fileids=None):
	"""
	Retorna árboles cuyas hojas son parejas (word,lemma)
	"""
	from nltk import tree
	from nltk.corpus.reader.util import concat
	def lemmatized(element):
		if element:
			subtrees = map(lemmatized, element)
			subtrees = [t for t in subtrees if t]
			return tree.Tree(element.tag, subtrees)
		elif element.get('elliptic') == 'yes': return None
		else: return tree.Tree(element.get('pos') or element.get('ne') or 'unk', [(element.get('wd'),element.get('lem'))])
	if not fileids: fileids = corpus.xmlreader.fileids()
	return LazyMap(lemmatized, concat([list(corpus.xmlreader.xml(fileid)) for fileid in fileids]))

Example #22

0

Show file

File: childes.py Project: johndpope/jazzparser

 def sents(self, fileids=None, speaker='ALL', sent=True, stem=False, 
         relation=None, pos=False, strip_space=True, replace=False):
     """
     @return: the given file(s) as a list of sentences
     @rtype: C{list} of (C{list} of C{str})
     
     @param speaker: If specified, select specitic speakers defined in 
         the corpus. Default is 'ALL'. Common choices are 'CHI' (all 
         children) and 'MOT' (mothers)
     @param stem: If true, then use word stems instead of word strings.
     @param relation: If true, then return tuples of C{(str,relation_list)}
     @param pos: If true, then return tuples of C{(stem, part_of_speech)}
     @param strip_space: If true, then strip trailing spaces from word 
         tokens. Otherwise, leave the spaces on the tokens.
     @param replace: If true, then use the replaced word instead 
         of the original word (e.g., 'wat' will be replaced with 'watch')
     """
     return concat([self._get_words(fileid, speaker, sent, stem, relation, 
         pos, strip_space, replace) for fileid in self.abspaths(fileids)])

Example #23

0

Show file

File: childes.py Project: Joselin/nltk

    def words(self, fileids=None, speaker='ALL', sent=None, stem=False,
            relation=False, pos=False, strip_space=True, replace=False):
        """
        :return: the given file(s) as a list of words
        :rtype: list(str)

        :param speaker: If list is specified, select specitic speakers defined
        	in the corpus. Default is 'All' (all participants). Common choices
        	are ['CHI'] (all children), ['MOT'] (mothers), ['CHI','MOT'] (exclude
        	researchers)
        :param stem: If true, then use word stems instead of word strings.
        :param relation: If true, then return tuples of (stem, index,
            dependent_index)
        :param strip_space: If true, then strip trailing spaces from word
            tokens. Otherwise, leave the spaces on the tokens.
        :param replace: If true, then use the replaced word instead
            of the original word (e.g., 'wat' will be replaced with 'watch')
        """
        return concat([self._get_words(fileid, speaker, sent, stem, relation,
            pos, strip_space, replace) for fileid in self.abspaths(fileids)])

Example #24

0

Show file

File: childes.py Project: Joselin/nltk

    def tagged_sents(self, fileids=None, speaker='ALL', sent=True, stem=False,
            relation=None, pos=True, strip_space=True, replace=False):
        """
        :return: the given file(s) as a list of
            sentences, each encoded as a list of ``(word,tag)`` tuples.
        :rtype: list(list(tuple(str,str)))

        :param speaker: If list is specified, select specitic speakers defined
        	in the corpus. Default is 'All' (all participants). Common choices
        	are ['CHI'] (all children), ['MOT'] (mothers), ['CHI','MOT'] (exclude
        	researchers)
        :param stem: If true, then use word stems instead of word strings.
        :param relation: If true, then return tuples of ``(str,pos,relation_list)``.
            If there is manually-annotated relation info, it will return tuples of
            tuples of ``(str,pos,test_relation_list,str,pos,gold_relation_list)``
        :param strip_space: If true, then strip trailing spaces from word
            tokens. Otherwise, leave the spaces on the tokens.
        :param replace: If true, then use the replaced word instead
            of the original word (e.g., 'wat' will be replaced with 'watch')
        """
        return concat([self._get_words(fileid, speaker, sent, stem, relation,
            pos, strip_space, replace) for fileid in self.abspaths(fileids)])

Example #25

0

Show file

File: mychildes.py Project: langcog/alignment

    def tagged_words(self, fileids=None, speaker='ALL', stem=False,
            relation=False, strip_space=True, replace=False):
        """
        :return: the given file(s) as a list of tagged
            words and punctuation symbols, encoded as tuples
            ``(word,tag)``.
        :rtype: list(tuple(str,str))

        :param speaker: If specified, select specific speaker(s) defined
            in the corpus. Default is 'ALL' (all participants). Common choices
            are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude
            researchers)
        :param stem: If true, then use word stems instead of word strings.
        :param relation: If true, then return tuples of (stem, index,
            dependent_index)
        :param strip_space: If true, then strip trailing spaces from word
            tokens. Otherwise, leave the spaces on the tokens.
        :param replace: If true, then use the replaced (intended) word instead
            of the original word (e.g., 'wat' will be replaced with 'watch')
        """
        sent=None
        pos=True
        return concat([self._get_words(fileid, speaker, sent, stem, relation,
            pos, strip_space, replace) for fileid in self.abspaths(fileids)])

Example #26

0

Show file

File: ancora.py Project: nanoschm/PLN-2015

 def tagged_sents(self, fileids=None):
     if not fileids:
         fileids = self.xmlreader.fileids()
     return LazyMap(tagged, concat([list(self.xmlreader.xml(fileid)) for fileid in fileids]))

Example #27

0

Show file

File: example_utility.py Project: jerrygaoLondon/jgtextrank

 def raw(self, fileids=None):
     if fileids is None: fileids = self._fileids
     elif isinstance(fileids, string_types): fileids = [fileids]
     return concat([self.open(f).read() for f in fileids])

Example #28

0

Show file

File: ancora.py Project: gsiriani/grampln

	def stemmed_words(self, fileids=None):
		return { t[0].lower(): t[1].lower() for t in concat(self.stemmed_sents(fileids)) }

Example #29

0

Show file

 def _views(self, fileids=None, sent=False, tag=False, strip_space=True, stem=False):
     """A helper function that instantiates BNCWordViews or the list of words/sentences."""
     f = BNCWordView if self._lazy else self._words
     return concat([f(fileid, sent, tag, strip_space, stem) for fileid in self.abspaths(fileids)])

Example #30

0

Show file

File: language.py Project: dmontalvo/IAWatchBot

 def freqs(self, fileids=None):
     '''
     Return trigram frequencies for a language from the corpus        
     '''
     return concat([self.CorpusView(path, self._read_trigram_block) 
                    for path in self.abspaths(fileids=fileids)])

Example #31

0

Show file

 def tagged_words(self, fileids=None):
     # XXX: use LazyConcatenation?
     return concat(self.tagged_sents(fileids))

Example #32

0

Show file

 def sents(self, fileids=None, **kwargs):
     return concat([self._view(fileid,
         mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs)
         for fileid in self._list_morph_files(fileids)])

Example #33

0

Show file

		def freqs(self, fileids=None):
			return concat([self.CorpusView(path, self._read_trigram_block) for path in self.abspaths(fileids=fileids)])

Example #34

0

Show file

 def tagged_paras(self, fileids=None, **kwargs):
     return concat([self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE,
         **kwargs)
         for fileid in self._list_morph_files(fileids)])

Example #35

0

Show file

File: ancora.py Project: nanoschm/PLN-2015

 def sents(self, fileids=None):
     # FIXME: not lazy!
     if not fileids:
         fileids = self.xmlreader.fileids()
     return LazyMap(untagged, concat([list(self.xmlreader.xml(fileid)) for fileid in fileids]))

Example #36

0

Show file

 def tagged_sents(self, fileids=None):
     if not fileids:
         fileids = self.xmlreader.fileids()
     return LazyMap(
         tagged,
         concat([list(self.xmlreader.xml(fileid)) for fileid in fileids]))

Example #37

0

Show file

 def tagged_words(self, fileids=None):
     return concat(self.tagged_sents(fileids))

Example #38

0

Show file

File: ancora.py Project: nanoschm/PLN-2015

 def tagged_words(self, fileids=None):
     return concat(self.tagged_sents(fileids))

Example #39

0

Show file

 def tagged_words(self, fileids=None, **kwargs):
     return concat(
         [self._view(fileid, **kwargs) for fileid in self._list_morph_files(fileids)]
     )