def parse_filename(project, filename): """Parses a filename. :param str project: Project code. :param str filename: Filename. :returns: Set of terms extracted from the filename. :rtype: set """ assert isinstance(project, basestring), 'Invalid project' assert isinstance(filename, basestring), 'Invalid filename' global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS if _PROJECT != project: # Get scope corresponding to the project code. scopes = all_scopes() assert project in [scope.name for scope in scopes], 'Unsupported project' scope = [scope for scope in scopes if scope.name == project][0] assert 'filename' in scope.data.keys(), 'Filename parser not found' assert 'template' in scope.data['filename'].keys( ), 'Filename parser template not found' assert 'collections' in scope.data['filename'].keys( ), 'Filename parser template collections not found' # Get template from data scope. _TEMPLATE = scope.data['filename']['template'] assert isinstance(_TEMPLATE, basestring), 'Invalid template' # Get template collections from data scope. _COLLECTIONS = list() for name in scope.data['filename']['collections']: _COLLECTIONS.append([ collection.namespace for collection in scope.collections if collection.name == name.replace('_', '-') ][0]) assert _COLLECTIONS, 'Invalid collections' # Instantiate parser JIT. _PARSER = create_template_parser(_TEMPLATE, tuple(_COLLECTIONS), PARSING_STRICTNESS_1, separator='_') # Cached project. _PROJECT = project # Strip file extension. filename = splitext(filename)[0] try: return _PARSER.parse(filename) except TemplateParsingError: # Add suffix to filename without file period. return _PARSER.parse(filename + '_fixed')
def parse_directory(project, directory): """Parses a directory. :param str project: Project code. :param str directory: Data directory. :returns: Set of terms extracted from the directory. :rtype: set """ assert isinstance(project, basestring), 'Invalid project' assert isinstance(directory, basestring), 'Invalid directory' global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS if _PROJECT != project: # Instantiated template _TEMPLATE = None # Instantiated template collections _COLLECTIONS = None # Get scope corresponding to the project code. scopes = all_scopes() assert project in [scope.name for scope in scopes], 'Unsupported project' scope = [scope for scope in scopes if scope.name == project][0] assert 'directory_structure' in scope.data.keys( ), 'Directory parser not found' assert 'template' in scope.data['directory_structure'].keys( ), 'Directory parser template not found' assert 'collections' in scope.data['directory_structure'].keys( ), 'Directory parser template collections not found' # Get template from data scope. _TEMPLATE = scope.data['directory_structure']['template'] assert isinstance(_TEMPLATE, basestring), 'Invalid template' # Get template collections from data scope. _COLLECTIONS = list() for name in scope.data['directory_structure']['collections']: _COLLECTIONS.append([ collection.namespace for collection in scope.collections if collection.name == name.replace('_', '-') ][0]) assert _COLLECTIONS, 'Invalid collections' # Instantiate parser JIT. _PARSER = create_template_parser(_TEMPLATE, tuple(_COLLECTIONS), PARSING_STRICTNESS_1, separator='/') # Cached project. _PROJECT = project return _PARSER.parse(directory)
def build_filename(project, terms): """Builds a filename. :param str project: Project code. :param set terms: Filename terms. :returns: Filename string. :rtype: str """ assert isinstance(project, basestring), 'Invalid project' assert isinstance(terms, set), 'Invalid terms' global _PROJECT, _BUILDER, _TEMPLATE, _COLLECTIONS if _PROJECT != project: # Get scope corresponding to the project code. scopes = all_scopes() assert project in [scope.name for scope in scopes], 'Unsupported project' scope = [scope for scope in scopes if scope.name == project][0] assert 'filename' in scope.data.keys(), 'Filename parser not found' assert 'template' in scope.data['filename'].keys( ), 'Filename parser template not found' assert 'collections' in scope.data['filename'].keys( ), 'Filename parser template collections not found' # Get template from data scope. _TEMPLATE = scope.data['filename']['template'] assert isinstance(_TEMPLATE, basestring), 'Invalid template' # Get template collections from data scope. _COLLECTIONS = list() for name in scope.data['filename']['collections']: _COLLECTIONS.append([ collection.namespace for collection in scope.collections if collection.name == name.replace('_', '-') ][0]) assert _COLLECTIONS, 'Invalid collections' # Instantiate parser JIT. _BUILDER = create_template_builder(_TEMPLATE, tuple(_COLLECTIONS), PARSING_STRICTNESS_1, separator='_') # Cached project. _PROJECT = project for term in terms: assert isinstance(term, Term), 'Invalid term :: {}'.format(term) return _BUILDER.build(terms)
def parse_dataset_identifier(project, identifier): """Parses a dataset identifier. :param str project: Project code. :param str identifier: Dataset identifier. :returns: Set of terms extracted from the identifier. :rtype: set """ assert isinstance(project, basestring), 'Invalid project' assert isinstance(identifier, basestring), 'Invalid identifier' global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS if _PROJECT != project: # Get scope corresponding to the project code. scopes = all_scopes() assert project in [scope.name for scope in scopes], 'Unsupported project' scope = [scope for scope in scopes if scope.name == project][0] assert 'dataset_id' in scope.data.keys(), 'Dataset ID parser not found' assert 'template' in scope.data['dataset_id'].keys( ), 'Dataset ID parser template not found' assert 'collections' in scope.data['dataset_id'].keys( ), 'Dataset ID parser template collections not found' # Get template from data scope. _TEMPLATE = scope.data['dataset_id']['template'] assert isinstance(_TEMPLATE, basestring), 'Invalid template' # Get template collections from data scope. _COLLECTIONS = list() for name in scope.data['dataset_id']['collections']: _COLLECTIONS.append([ collection.namespace for collection in scope.collections if collection.name == name.replace('_', '-') ][0]) assert _COLLECTIONS, 'Invalid collections' # Instantiate parser JIT. _PARSER = create_template_parser(_TEMPLATE, tuple(_COLLECTIONS), PARSING_STRICTNESS_1) # Cached project. _PROJECT = project # Convert version suffix to an identifier element. identifier = identifier.replace('#', '.v') return _PARSER.parse(identifier)