def load(class_, url, **args): """Loads the sample from a data URI.""" info_re = '(?P<mediatype>[^/]+/[^;]+(;[^=]+=[^;]+)*)?(?P<base64>;base64)?' info, data = url.path.split(',') info = re.match(info_re, info).groupdict() mediatype = info['mediatype'].setdefault('text/plain;charset=US-ASCII') if ';' in mediatype: mimetype, params = mediatype.split(';', 1) params = [p.strip().split('=') for p in params.split(';')] params = dict((k.strip(), v.strip()) for k, v in params) else: mimetype, params = mediatype, dict() data = base64.b64decode(data) if info['base64'] else urllib.unquote(data) return class_(**content_types.get(mimetype).parse(data, **params))
def load(class_, url, **args): """Loads the sample from a file URL.""" if os.path.isdir(url.path): files = { 'text': None, 'lexicon': None, 'word_delimiters': None, 'sentence_delimiters': None} for filename in files: with open(os.path.join(url.path, filename + '.txt'), 'rb') as txt: files[filename] = txt.read().decode('UTF-8') return class_(**files) else: with open(url.path, 'rb') as file_: content = file_.read() mimetype, encoding = mimetypes.guess_type(url.path) content_encoding = args.get('content_encoding', encoding) content_type = args.get('content_type', mimetype) if content_encoding: content = content_encodings.get(content_encoding).decode(content) frozen = content_types.get(content_type).parse(content) return class_(**frozen)
def load(class_, url, **args): """Loads the sample from a file URL.""" if os.path.isdir(url.path): files = { 'text': None, 'lexicon': None, 'word_delimiters': None, 'sentence_delimiters': None } for filename in files: with open(os.path.join(url.path, filename + '.txt'), 'rb') as txt: files[filename] = txt.read().decode('UTF-8') return class_(**files) else: with open(url.path, 'rb') as file_: content = file_.read() mimetype, encoding = mimetypes.guess_type(url.path) content_encoding = args.get('content_encoding', encoding) content_type = args.get('content_type', mimetype) if content_encoding: content = content_encodings.get(content_encoding).decode(content) frozen = content_types.get(content_type).parse(content) return class_(**frozen)
def dump(sample, url, **args): """Dunps the sample into a file URL.""" prefix, extension = os.path.splitext(url.path) if not extension: if not os.path.exists(url.path): os.mkdir(url.path) files = { 'text': sample['text'], 'lexicon': sample['lexicon'], 'word_delimiters': sample['word_delimiters'], 'sentence_delimiters': sample['sentence_delimiters']} for filename in files: with open(os.path.join(prefix, filename + '.txt'), 'wb') as file_: file_.write(files.get(filename).encode('UTF-8')) else: mimetype, encoding = mimetypes.guess_type(url.path) content_encoding = args.get('content_encoding', encoding) content_type = args.get('content_type', mimetype) content = content_types.get(content_type).format(sample.frozen()) if content_encoding: content = content_encodings.get(content_encoding).encode(content) with open(url.path, 'wb') as file_: file_.write(content)
def dump(sample, url, **args): """Dunps the sample into a file URL.""" prefix, extension = os.path.splitext(url.path) if not extension: if not os.path.exists(url.path): os.mkdir(url.path) files = { 'text': sample['text'], 'lexicon': sample['lexicon'], 'word_delimiters': sample['word_delimiters'], 'sentence_delimiters': sample['sentence_delimiters'] } for filename in files: with open(os.path.join(prefix, filename + '.txt'), 'wb') as file_: file_.write(files.get(filename).encode('UTF-8')) else: mimetype, encoding = mimetypes.guess_type(url.path) content_encoding = args.get('content_encoding', encoding) content_type = args.get('content_type', mimetype) content = content_types.get(content_type).format(sample.frozen()) if content_encoding: content = content_encodings.get(content_encoding).encode(content) with open(url.path, 'wb') as file_: file_.write(content)