コード例 #1
0
ファイル: Corpus.py プロジェクト: gharp/Ubiqu-Ity
 def __init__(
     self,
     path,
     name=None,
     extensions=(".txt",),
     texts_path=None,
     metadata_path=None,
     output_path=None
 ):
     # Main Corpus Path
     if type(path) is not str:
         raise ValueError("Invalid path argument provided.")
     # If we didn't get an absolute path, assume it's a path relative to Ity.corpus_root.
     if not os.path.isabs(path):
         path = os.path.join(Ity.corpus_root, path)
     # This call to os.path.abspath(), among other things, removes trailing
     # slashes from the path.
     self.path = os.path.abspath(path)
     # Okay, does the path actually exist?
     if not os.path.exists(self.path):
         raise IOError("Corpus at path '%s' does not exist." % self.path)
     # Texts Path
     self.texts_path = get_valid_path(
         path=texts_path,
         relative_path_base=self.path,
         fallback_path=self.path
     )
     # It's NOT okay if this path doesn't exist.
     if type(self.texts_path) is not str or not os.path.exists(self.texts_path):
         raise ValueError("Path to texts ('%s') doesn't exist." % self.texts_path)
     # Corpus Name
     if name is None or type(name) is not str:
         name = os.path.basename(self.path)
     self.name = name
     # Metadata Path
     self.metadata_path = get_valid_path(
         path=metadata_path,
         relative_path_base=self.path,
         fallback_path=os.path.join(Ity.metadata_root, self.name)
     )
     # Output Path
     self.output_path = get_valid_path(
         path=output_path,
         relative_path_base=self.path,
         fallback_path=os.path.join(Ity.output_root, self.name)
     )
     # Extensions
     if extensions is None or type(extensions) is str or len(extensions) == 0:
         raise ValueError("Invalid extensions argument provided.")
     self.extensions = extensions
     self._texts = None
     self.metadata = {}
     self.batch_format_data = {}
コード例 #2
0
 def __init__(self,
              path,
              name=None,
              extensions=(".txt", ),
              texts_path=None,
              metadata_path=None,
              output_path=None):
     # Main Corpus Path
     if type(path) is not str:
         raise ValueError("Invalid path argument provided.")
     # If we didn't get an absolute path, assume it's a path relative to Ity.corpus_root.
     if not os.path.isabs(path):
         path = os.path.join(Ity.corpus_root, path)
     # This call to os.path.abspath(), among other things, removes trailing
     # slashes from the path.
     self.path = os.path.abspath(path)
     # Okay, does the path actually exist?
     if not os.path.exists(self.path):
         raise IOError("Corpus at path '%s' does not exist." % self.path)
     # Texts Path
     self.texts_path = get_valid_path(path=texts_path,
                                      relative_path_base=self.path,
                                      fallback_path=self.path)
     # It's NOT okay if this path doesn't exist.
     if type(self.texts_path) is not str or not os.path.exists(
             self.texts_path):
         raise ValueError("Path to texts ('%s') doesn't exist." %
                          self.texts_path)
     # Corpus Name
     if name is None or type(name) is not str:
         name = os.path.basename(self.path)
     self.name = name
     # Metadata Path
     self.metadata_path = get_valid_path(path=metadata_path,
                                         relative_path_base=self.path,
                                         fallback_path=os.path.join(
                                             Ity.metadata_root, self.name))
     # Output Path
     self.output_path = get_valid_path(path=output_path,
                                       relative_path_base=self.path,
                                       fallback_path=os.path.join(
                                           Ity.output_root, self.name))
     # Extensions
     if extensions is None or type(extensions) is str or len(
             extensions) == 0:
         raise ValueError("Invalid extensions argument provided.")
     self.extensions = extensions
     self._texts = None
     self.metadata = {}
     self.batch_format_data = {}
コード例 #3
0
ファイル: CorpusText.py プロジェクト: starsplatter/Ubiqu-Ity
 def __init__(self, path, name=None, corpus=None, output_path=None):
     # Text Path
     if type(path) is not str:
         raise ValueError("Invalid path argument provided.")
     # Is the path absolute?
     if not os.path.isabs(path):
         # No? Can we figure out where it is based on the corpus argument?
         if corpus is None and hasattr(corpus, "texts_path") and type(corpus.texts_path) is str:
             path = os.path.join(corpus.texts_path, path)
         else:
             raise ValueError("Given a relative path to a text without a corpus argument.")
     # This call to os.path.abspath(), among other things, removes trailing
     # slashes from the path.
     self.path = os.path.abspath(path)
     # Okay, does the path actually exist?
     if not os.path.exists(self.path):
         raise ValueError("Text file at path '%s' does not exist." % self.path)
     # Text Name
     if name is None or type(name) is not str:
         name = os.path.splitext(os.path.basename(self.path))[0]
     self.name = name
     # Text Corpus (may be None)
     self.corpus = corpus
     # Output Path
     self.output_path = get_valid_path(path=output_path, fallback_path=os.path.join(Ity.output_root, self.name))
     self.metadata = None
     self._text_str = None
     self.tokens = []
     self.tag_data = {}
     self.format_data = {}
コード例 #4
0
ファイル: CorpusText.py プロジェクト: uwgraphics/Ubiqu-Ity
 def __init__(self, path, name=None, corpus=None, output_path=None):
     # Text Path
     if type(path) is not str:
         raise ValueError("Invalid path argument provided.")
     # Is the path absolute?
     if not os.path.isabs(path):
         # No? Can we figure out where it is based on the corpus argument?
         if (corpus is None and hasattr(corpus, "texts_path")
                 and type(corpus.texts_path) is str):
             path = os.path.join(corpus.texts_path, path)
         else:
             raise ValueError(
                 "Given a relative path to a text without a corpus argument."
             )
     # This call to os.path.abspath(), among other things, removes trailing
     # slashes from the path.
     self.path = os.path.abspath(path)
     # Okay, does the path actually exist?
     if not os.path.exists(self.path):
         raise ValueError("Text file at path '%s' does not exist." %
                          self.path)
     # Text Name
     if name is None or type(name) is not str:
         name = os.path.splitext(os.path.basename(self.path))[0]
     self.name = name
     # Text Corpus (may be None)
     self.corpus = corpus
     # Output Path
     self.output_path = get_valid_path(path=output_path,
                                       fallback_path=os.path.join(
                                           Ity.output_root, self.name))
     self.metadata = None
     self._text_str = None
     self.tokens = []
     self.tag_data = {}
     self.format_data = {}