def __init__( self, path, name=None, extensions=(".txt",), texts_path=None, metadata_path=None, output_path=None ): # Main Corpus Path if type(path) is not str: raise ValueError("Invalid path argument provided.") # If we didn't get an absolute path, assume it's a path relative to Ity.corpus_root. if not os.path.isabs(path): path = os.path.join(Ity.corpus_root, path) # This call to os.path.abspath(), among other things, removes trailing # slashes from the path. self.path = os.path.abspath(path) # Okay, does the path actually exist? if not os.path.exists(self.path): raise IOError("Corpus at path '%s' does not exist." % self.path) # Texts Path self.texts_path = get_valid_path( path=texts_path, relative_path_base=self.path, fallback_path=self.path ) # It's NOT okay if this path doesn't exist. if type(self.texts_path) is not str or not os.path.exists(self.texts_path): raise ValueError("Path to texts ('%s') doesn't exist." % self.texts_path) # Corpus Name if name is None or type(name) is not str: name = os.path.basename(self.path) self.name = name # Metadata Path self.metadata_path = get_valid_path( path=metadata_path, relative_path_base=self.path, fallback_path=os.path.join(Ity.metadata_root, self.name) ) # Output Path self.output_path = get_valid_path( path=output_path, relative_path_base=self.path, fallback_path=os.path.join(Ity.output_root, self.name) ) # Extensions if extensions is None or type(extensions) is str or len(extensions) == 0: raise ValueError("Invalid extensions argument provided.") self.extensions = extensions self._texts = None self.metadata = {} self.batch_format_data = {}
def __init__(self, path, name=None, extensions=(".txt", ), texts_path=None, metadata_path=None, output_path=None): # Main Corpus Path if type(path) is not str: raise ValueError("Invalid path argument provided.") # If we didn't get an absolute path, assume it's a path relative to Ity.corpus_root. if not os.path.isabs(path): path = os.path.join(Ity.corpus_root, path) # This call to os.path.abspath(), among other things, removes trailing # slashes from the path. self.path = os.path.abspath(path) # Okay, does the path actually exist? if not os.path.exists(self.path): raise IOError("Corpus at path '%s' does not exist." % self.path) # Texts Path self.texts_path = get_valid_path(path=texts_path, relative_path_base=self.path, fallback_path=self.path) # It's NOT okay if this path doesn't exist. if type(self.texts_path) is not str or not os.path.exists( self.texts_path): raise ValueError("Path to texts ('%s') doesn't exist." % self.texts_path) # Corpus Name if name is None or type(name) is not str: name = os.path.basename(self.path) self.name = name # Metadata Path self.metadata_path = get_valid_path(path=metadata_path, relative_path_base=self.path, fallback_path=os.path.join( Ity.metadata_root, self.name)) # Output Path self.output_path = get_valid_path(path=output_path, relative_path_base=self.path, fallback_path=os.path.join( Ity.output_root, self.name)) # Extensions if extensions is None or type(extensions) is str or len( extensions) == 0: raise ValueError("Invalid extensions argument provided.") self.extensions = extensions self._texts = None self.metadata = {} self.batch_format_data = {}
def __init__(self, path, name=None, corpus=None, output_path=None): # Text Path if type(path) is not str: raise ValueError("Invalid path argument provided.") # Is the path absolute? if not os.path.isabs(path): # No? Can we figure out where it is based on the corpus argument? if corpus is None and hasattr(corpus, "texts_path") and type(corpus.texts_path) is str: path = os.path.join(corpus.texts_path, path) else: raise ValueError("Given a relative path to a text without a corpus argument.") # This call to os.path.abspath(), among other things, removes trailing # slashes from the path. self.path = os.path.abspath(path) # Okay, does the path actually exist? if not os.path.exists(self.path): raise ValueError("Text file at path '%s' does not exist." % self.path) # Text Name if name is None or type(name) is not str: name = os.path.splitext(os.path.basename(self.path))[0] self.name = name # Text Corpus (may be None) self.corpus = corpus # Output Path self.output_path = get_valid_path(path=output_path, fallback_path=os.path.join(Ity.output_root, self.name)) self.metadata = None self._text_str = None self.tokens = [] self.tag_data = {} self.format_data = {}
def __init__(self, path, name=None, corpus=None, output_path=None): # Text Path if type(path) is not str: raise ValueError("Invalid path argument provided.") # Is the path absolute? if not os.path.isabs(path): # No? Can we figure out where it is based on the corpus argument? if (corpus is None and hasattr(corpus, "texts_path") and type(corpus.texts_path) is str): path = os.path.join(corpus.texts_path, path) else: raise ValueError( "Given a relative path to a text without a corpus argument." ) # This call to os.path.abspath(), among other things, removes trailing # slashes from the path. self.path = os.path.abspath(path) # Okay, does the path actually exist? if not os.path.exists(self.path): raise ValueError("Text file at path '%s' does not exist." % self.path) # Text Name if name is None or type(name) is not str: name = os.path.splitext(os.path.basename(self.path))[0] self.name = name # Text Corpus (may be None) self.corpus = corpus # Output Path self.output_path = get_valid_path(path=output_path, fallback_path=os.path.join( Ity.output_root, self.name)) self.metadata = None self._text_str = None self.tokens = [] self.tag_data = {} self.format_data = {}