Beispiel #1
0
    def _translateExtensions(
        self,
        fileExtensions=None,
        expandExtensions=True,
    ):
        # noinspection PyShadowingNames
        '''
        Utility to get default extensions, or, optionally, expand extensions to
        all known formats.

        >>> coreCorpus = corpus.corpora.CoreCorpus()
        >>> for extension in coreCorpus._translateExtensions():
        ...     extension
        ...
        '.abc'
        '.capx'
        '.mid'
        '.midi'
        '.xml'
        '.mxl'
        '.musicxml'
        '.md'
        '.musedata'
        '.zip'
        '.krn'
        '.rntxt'
        '.rntext'
        '.romantext'
        '.rtxt'
        '.nwctxt'
        '.nwc'

        >>> coreCorpus._translateExtensions('.mid', False)
        ['.mid']

        >>> coreCorpus._translateExtensions('.mid', True)
        ['.mid', '.midi']

        It does not matter if you choose a canonical name or not, the output is the same:

        >>> coreCorpus._translateExtensions('.musicxml', True)
        ['.xml', '.mxl', '.musicxml']

        >>> coreCorpus._translateExtensions('.xml', True)
        ['.xml', '.mxl', '.musicxml']
        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        if len(fileExtensions) == 1 and fileExtensions[0] is None:
            fileExtensions = Corpus._allExtensions
        elif expandExtensions:
            expandedExtensions = []
            for extension in fileExtensions:
                allInputExtensions = common.findInputExtension(extension)
                if allInputExtensions is None:
                    pass
                else:
                    expandedExtensions += allInputExtensions
            return expandedExtensions
        return fileExtensions
Beispiel #2
0
def getPaths(extList=None):    
    '''Get all paths in the corpus that match a known extension, or an extenion
    provided by an argument.

    >>> a = getPaths()
    >>> len(a) > 30
    True

    >>> a = getPaths('krn')
    >>> len(a) >= 4
    True
    '''
    if not common.isListLike(extList):
        extList = [extList]

    if extList == [None]:
        extList = (common.findInputExtension('lily') +
                   common.findInputExtension('musicxml') +
                   common.findInputExtension('humdrum'))
    #environLocal.printDebug(['getting paths with extensions:', extList])
    paths = []    
    for moduleName in MODULES:
        if not hasattr(moduleName, '__path__'):
            # when importing a package name (a directory) the moduleName        
            # may be a list of all paths contained within the package
            # this seems to be dependent on the context of the call:
            # from the command line is different than from the interpreter
            dirListing = moduleName
        else:
            # returns a list with one or more paths
            # the first is the path to the directory that contains xml files
            dir = moduleName.__path__[0] 
            dirListing = [os.path.join(dir, x) for x in os.listdir(dir)]

        for fp in dirListing:
            if fp in paths:
                continue
            match = False
            for ext in extList:
                if fp.endswith(ext):
                    match = True
                    break 
            if match:
                if fp not in paths:
                    paths.append(fp)    
    return paths
Beispiel #3
0
def getPaths(extList=None, expandExtensions=True):    
    '''Get all paths in the corpus that match a known extension, or an extenion
    provided by an argument.

    If `expandExtensions` is True, a format for an extension, and related extensions, will replaced by all known input extensions. This is convenient when an input format might match for multiple extensions.

    >>> a = getPaths()
    >>> len(a) > 30
    True

    >>> a = getPaths('krn')
    >>> len(a) >= 4
    True

    >>> a = getPaths('abc')
    >>> len(a) >= 10
    True

    '''
    if not common.isListLike(extList):
        extList = [extList]

    if extList == [None]:
        extList = _ALL_EXTENSIONS
    elif expandExtensions:
        extMod = []
        for e in extList:
            extMod += common.findInputExtension(e)
        extList = extMod
        
    #environLocal.printDebug(['getting paths with extensions:', extList])
    paths = []    
    for moduleName in MODULES:
        if not hasattr(moduleName, '__path__'):
            # when importing a package name (a directory) the moduleName        
            # may be a list of all paths contained within the package
            # this seems to be dependent on the context of the call:
            # from the command line is different than from the interpreter
            dirListing = moduleName
        else:
            # returns a list with one or more paths
            # the first is the path to the directory that contains xml files
            dir = moduleName.__path__[0] 
            dirListing = [os.path.join(dir, x) for x in os.listdir(dir)]

        for fp in dirListing:
            if fp in paths:
                continue
            match = False
            for ext in extList:
                if fp.endswith(ext):
                    match = True
                    break 
            if match:
                if fp not in paths:
                    paths.append(fp)    
    return paths
Beispiel #4
0
    def _translateExtensions(
        self,
        fileExtensions=None,
        expandExtensions=True,
        ):
        '''
        Utility to get default extensions, or, optionally, expand extensions to
        all known formats.

        >>> from music21 import corpus
        >>> coreCorpus = corpus.CoreCorpus()
        >>> for extension in coreCorpus._translateExtensions():
        ...     extension
        ...
        '.abc'
        '.capx'
        '.mid'
        '.midi'
        '.xml'
        '.mxl'
        '.mx'
        '.musicxml'
        '.md'
        '.musedata'
        '.zip'
        '.krn'
        '.rntxt'
        '.rntext'
        '.romantext'
        '.rtxt'
        '.nwctxt'
        '.nwc'

        >>> coreCorpus._translateExtensions('.mid', False)
        ['.mid']

        >>> coreCorpus._translateExtensions('.mid', True)
        ['.mid', '.midi']

        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        if fileExtensions == [None]:
            fileExtensions = Corpus._allExtensions
        elif expandExtensions:
            expandedExtensions = []
            for extension in fileExtensions:
                allInputExtensions = common.findInputExtension(extension)
                if allInputExtensions is None:
                    pass
                else:
                    expandedExtensions += allInputExtensions
            return expandedExtensions
        return fileExtensions
Beispiel #5
0
def getVirtualPaths(extList=None):
    '''Get all paths in the virtual corpus that match a known extension. An extension of None will return all known extensions.
   
    >>> len(getVirtualPaths()) > 6
    True
    '''
    if not common.isListLike(extList):
        extList = [extList]

    if extList == [None]:
        extList = (common.findInputExtension('lily') +
                   common.findInputExtension('musicxml') +
                   common.findInputExtension('humdrum'))
    paths = []
    for obj in VIRTUAL:
        if obj.corpusPath != None:
            for ext in extList:
                #environLocal.printDebug([obj.corpusPath, ext])
                post = obj.getUrlByExt(ext)
                for part in post:
                    if part not in paths:
                        paths.append(part)
    return paths
Beispiel #6
0
class Corpus(prebase.ProtoM21Object):
    r'''
    Abstract base class of all corpora subclasses.
    '''

    # CLASS VARIABLES #

    __metaclass__ = abc.ABCMeta

    # TODO: this is volatile -- should be elsewhere...
    _acceptableExtensions = [
        'abc', 'capella', 'midi', 'musicxml', 'musedata', 'humdrum',
        'romantext', 'noteworthytext', 'noteworthy'
    ]

    _allExtensions = tuple(
        common.flattenList(
            [common.findInputExtension(x) for x in _acceptableExtensions]))

    _pathsCache = {}

    _directoryInformation = ()  # a tuple of triples -- see coreCorpus

    parseUsingCorpus = True

    # SPECIAL METHODS #

    def _reprInternal(self):
        return ''

    # PRIVATE METHODS #

    def _removeNameFromCache(self, name):
        keysToRemove = []
        for key in list(Corpus._pathsCache):
            if str(key[0]) == name:
                keysToRemove.append(key)

        for key in keysToRemove:
            del (Corpus._pathsCache[key])

    def _findPaths(self, rootDirectoryPath, fileExtensions):
        '''
        Given a root filePath file path, recursively search all contained paths
        for files in `rootFilePath` matching any of the file extensions in
        `fileExtensions`.

        The `fileExtensions` is a list of file file extensions.

        NB: we've tried optimizing with `fnmatch` but it does not save any
        time.
        '''
        rdp = common.cleanpath(rootDirectoryPath, returnPathlib=True)
        matched = []

        for filename in sorted(rdp.rglob('*')):
            if filename.name.startswith('__'):
                continue
            if filename.name.startswith('.'):
                continue
            for extension in fileExtensions:
                if filename.suffix.endswith(extension):
                    matched.append(filename)
                    break
        return matched

    def _translateExtensions(
        self,
        fileExtensions=None,
        expandExtensions=True,
    ):
        '''
        Utility to get default extensions, or, optionally, expand extensions to
        all known formats.

        >>> coreCorpus = corpus.corpora.CoreCorpus()
        >>> for extension in coreCorpus._translateExtensions():
        ...     extension
        ...
        '.abc'
        '.capx'
        '.mid'
        '.midi'
        '.xml'
        '.mxl'
        '.mx'
        '.musicxml'
        '.md'
        '.musedata'
        '.zip'
        '.krn'
        '.rntxt'
        '.rntext'
        '.romantext'
        '.rtxt'
        '.nwctxt'
        '.nwc'

        >>> coreCorpus._translateExtensions('.mid', False)
        ['.mid']

        >>> coreCorpus._translateExtensions('.mid', True)
        ['.mid', '.midi']

        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        if len(fileExtensions) == 1 and fileExtensions[0] is None:
            fileExtensions = Corpus._allExtensions
        elif expandExtensions:
            expandedExtensions = []
            for extension in fileExtensions:
                allInputExtensions = common.findInputExtension(extension)
                if allInputExtensions is None:
                    pass
                else:
                    expandedExtensions += allInputExtensions
            return expandedExtensions
        return fileExtensions

    # PRIVATE PROPERTIES #

    @property
    @abc.abstractmethod
    def cacheFilePath(self):
        raise NotImplementedError

    # PUBLIC METHODS #
    def rebuildMetadataCache(self, useMultiprocessing=True, verbose=True):
        r'''
        Rebuild a named bundle from scratch.

        If a bundle is associated with one of music21's corpora, delete any
        metadata cache on disk, clear the bundle's contents and reload in all
        files from that associated corpus.

        Return the rebuilt metadata bundle.
        '''
        mdb = self.metadataBundle
        if mdb is None:
            return self
        if self.cacheFilePath is None:
            return self

        mdb.clear()
        mdb.delete()
        self.cacheMetadata(useMultiprocessing=useMultiprocessing, verbose=True)
        return self.metadataBundle

    def cacheMetadata(self, useMultiprocessing=True, verbose=True, timer=None):
        '''
        Cache the metadata for a single corpus.
        '''
        def update(message):
            if verbose is True:
                environLocal.warn(message)
            else:
                environLocal.printDebug(message)

        if timer is None:
            timer = common.Timer()
            timer.start()

        metadataBundle = self.metadataBundle
        paths = self.getPaths()

        update('{} metadata cache: starting processing of paths: {}'.format(
            self.name, len(paths)))
        update('cache: filename: {0}'.format(metadataBundle.filePath))

        failingFilePaths = metadataBundle.addFromPaths(
            paths,
            parseUsingCorpus=self.parseUsingCorpus,
            useMultiprocessing=useMultiprocessing,
            verbose=verbose)

        update('cache: writing time: {0} md items: {1}\n'.format(
            timer, len(metadataBundle)))

        update('cache: filename: {0}'.format(metadataBundle.filePath))

        del metadataBundle
        return failingFilePaths

    @abc.abstractmethod
    def getPaths(self, fileExtensions=None, expandExtensions=True):
        r'''
        The paths of the files in a given corpus.
        '''
        raise NotImplementedError

    def getWorkList(
        self,
        workName,
        movementNumber=None,
        fileExtensions=None,
    ):
        r'''
        Search the corpus and return a list of filenames of works, always in a
        list.

        If no matches are found, an empty list is returned.

        >>> from music21 import corpus
        >>> coreCorpus = corpus.corpora.CoreCorpus()

        # returns 1 even though there is a '.mus' file, which cannot be read...

        >>> len(coreCorpus.getWorkList('cpebach/h186'))
        1
        >>> len(coreCorpus.getWorkList('cpebach/h186', None, '.xml'))
        1

        >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 3))
        1
        >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 2))
        0

        Make sure that 'verdi' just gets the single Verdi piece and not the
        Monteverdi pieces:

        >>> len(coreCorpus.getWorkList('verdi'))
        1

        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        paths = self.getPaths(fileExtensions)
        results = []

        workPath = pathlib.PurePath(workName)
        workPosix = workPath.as_posix().lower()
        # find all matches for the work name
        # TODO: this should match by path component, not just
        # substring
        for path in paths:
            if workPosix in path.as_posix().lower():
                results.append(path)

        if results:
            # more than one matched...use more stringent criterion:
            # must have a slash before the name
            previousResults = results
            results = []
            for path in previousResults:
                if '/' + workPosix in path.as_posix().lower():
                    results.append(path)
            if not results:
                results = previousResults

        movementResults = []
        if movementNumber is not None and results:
            # store one ore more possible mappings of movement number
            movementStrList = []
            # see if this is a pair
            if common.isIterable(movementNumber):
                movementStrList.append(''.join(str(x) for x in movementNumber))
                movementStrList.append('-'.join(
                    str(x) for x in movementNumber))
                movementStrList.append('movement' + '-'.join(
                    str(x) for x in movementNumber))
                movementStrList.append('movement' + '-0'.join(
                    str(x) for x in movementNumber))
            else:
                movementStrList += [
                    '0{0}'.format(movementNumber),
                    str(movementNumber),
                    'movement{0}'.format(movementNumber),
                ]
            for filePath in sorted(results):
                filename = filePath.name
                if filePath.suffix:
                    filenameWithoutExtension = filePath.stem
                else:
                    filenameWithoutExtension = None
                searchPartialMatch = True
                if filenameWithoutExtension is not None:
                    # look for direct matches first
                    for movementStr in movementStrList:
                        # if movementStr.lower() in filePath.lower():
                        if filenameWithoutExtension.lower(
                        ) == movementStr.lower():
                            movementResults.append(filePath)
                            searchPartialMatch = False
                # if we have one direct match, all other matches must
                # be direct. this will match multiple files with different
                # file extensions
                if movementResults:
                    continue
                if searchPartialMatch:
                    for movementStr in movementStrList:
                        if filename.startswith(movementStr.lower()):
                            movementResults.append(filePath)
            if not movementResults:
                pass
        else:
            movementResults = results
        return sorted(set(movementResults))

    def search(self, query, field=None, fileExtensions=None, **kwargs):
        r'''
        Search this corpus for metadata entries, returning a metadataBundle

        >>> corpus.corpora.CoreCorpus().search('3/4')
        <music21.metadata.bundles.MetadataBundle {1876 entries}>

        >>> corpus.corpora.CoreCorpus().search(
        ...      'bach',
        ...      field='composer',
        ...      )
        <music21.metadata.bundles.MetadataBundle {363 entries}>

        >>> predicate = lambda noteCount: noteCount < 20
        >>> corpus.corpora.CoreCorpus().search(
        ...     predicate,
        ...     field='noteCount',
        ...     )
        <music21.metadata.bundles.MetadataBundle {134 entries}>

        '''
        return self.metadataBundle.search(query,
                                          field=field,
                                          fileExtensions=fileExtensions,
                                          **kwargs)

    # PUBLIC PROPERTIES #

    @property
    def directoryInformation(self):
        '''
        Returns a tuple of DirectoryInformation objects for a
        each directory in self._directoryInformation.

        >>> core = corpus.corpora.CoreCorpus()
        >>> diBrief = core.directoryInformation[0:5]
        >>> diBrief
        (<music21.corpus.work.DirectoryInformation airdsAirs>,
         <music21.corpus.work.DirectoryInformation bach>,
         <music21.corpus.work.DirectoryInformation beach>,
         <music21.corpus.work.DirectoryInformation beethoven>,
         <music21.corpus.work.DirectoryInformation chopin>)
        >>> diBrief[2].directoryTitle
        'Amy Beach'
        '''
        dirInfo = []
        for infoTriple in self._directoryInformation:
            dirInfo.append(
                work.DirectoryInformation(*infoTriple, corpusObject=self))
        return tuple(dirInfo)

    @property
    @abc.abstractmethod
    def name(self):
        r'''
        The name of a given corpus.
        '''
        raise NotImplementedError

    @property
    def metadataBundle(self):
        r'''
        The metadata bundle for a corpus:

        >>> from music21 import corpus
        >>> corpus.corpora.CoreCorpus().metadataBundle
        <music21.metadata.bundles.MetadataBundle 'core': {151... entries}>

        As a technical aside, the metadata bundle for a corpus is actually
        stored in corpus.manager, in order to cache most effectively over
        multiple calls. There might be good reasons to eventually move them
        to each Corpus object, so long as its cached across instances of the
        class.
        '''
        from music21.corpus import manager
        mdb = manager.getMetadataBundleByCorpus(self)
        mdb.corpus = self
        return mdb

    def all(self):
        '''
        This is a synonym for the metadataBundle property, but easier to understand
        what it does.

        >>> from music21 import corpus
        >>> corpus.corpora.CoreCorpus().all()
        <music21.metadata.bundles.MetadataBundle 'core': {151... entries}>
        '''
        return self.metadataBundle

    def getComposer(
        self,
        composerName,
        fileExtensions=None,
    ):
        '''
        Return all filenames in the corpus that match a composer's or a
        collection's name. An `fileExtensions`, if provided, defines which
        extensions are returned. An `fileExtensions` of None (default) returns
        all extensions.

        Note that xml and mxl are treated equivalently.

        >>> from music21 import corpus
        >>> coreCorpus = corpus.corpora.CoreCorpus()
        >>> a = coreCorpus.getComposer('bach')
        >>> len(a) > 100
        True

        >>> a = coreCorpus.getComposer('bach', 'krn')
        >>> len(a) < 10
        True

        >>> a = coreCorpus.getComposer('bach', 'xml')
        >>> len(a) > 10
        True
        '''
        paths = self.getPaths(fileExtensions)
        results = []
        for path in paths:
            # iterate through path components; cannot match entire string
            # composer name may be at any level
            stubs = path.parts
            for stub in stubs:
                # need to remove extension if found
                if composerName.lower() == stub.lower():
                    results.append(path)
                    break
                # get all but the last dot group
                # this is done for file names that function like composer names
                elif '.' in stub:
                    newStub = '.'.join(stub.split('.')[:-1]).lower()
                    if newStub == composerName.lower():
                        results.append(path)
                        break
        results.sort()
        return results

    def getWorkReferences(self):
        '''
        Return a data dictionary for all works in this corpus
        Returns a list of corpus.work.DirectoryInformation objects, one
        for each directory. A 'works' dictionary for each composer
        provides references to dictionaries for all associated works.

        This is used in the generation of corpus documentation

        >>> workRefs = corpus.corpora.CoreCorpus().getWorkReferences()
        >>> workRefs[1:3]
        [<music21.corpus.work.DirectoryInformation bach>,
         <music21.corpus.work.DirectoryInformation beach>]
                 '''
        results = [di for di in self.directoryInformation]

        return results
Beispiel #7
0
class Corpus(object):
    r'''
    Abstract base class of all corpora subclasses.
    '''

    ### CLASS VARIABLES ###

    __metaclass__ = abc.ABCMeta

    _allExtensions = (common.findInputExtension('abc') +
                      common.findInputExtension('capella') +
                      common.findInputExtension('midi') +
                      common.findInputExtension('musicxml') +
                      common.findInputExtension('musedata') +
                      common.findInputExtension('humdrum') +
                      common.findInputExtension('romantext') +
                      common.findInputExtension('noteworthytext') +
                      common.findInputExtension('noteworthy'))

    _pathsCache = {}

    _directoryInformation = ()  # a tuple of triples -- see coreCorpus

    ### SPECIAL METHODS ###

    def __repr__(self):
        return '<{0}.{1}>'.format(
            self.__class__.__module__,
            self.__class__.__name__,
        )

    ### PRIVATE METHODS ###

    def _removeNameFromCache(self, name):
        for key in Corpus._pathsCache.keys():
            if key[0] == name:
                del (Corpus._pathsCache[key])

    def _findPaths(self, rootDirectoryPath, fileExtensions):
        '''
        Given a root filePath file path, recursively search all contained paths
        for files in `rootFilePath` matching any of the file extensions in
        `fileExtensions`.

        The `fileExtensions` is a list of file file extensions.

        NB: we've tried optimizing with `fnmatch` but it does not save any
        time.
        '''
        from music21 import corpus
        matched = []
        if six.PY2:
            rootDirectoryPath = six.u(rootDirectoryPath)

        for rootDirectory, directoryNames, filenames in os.walk(
                rootDirectoryPath):
            if '.svn' in directoryNames:
                directoryNames.remove('.svn')
            for filename in filenames:
                try:
                    if filename.startswith('.'):
                        continue
                except UnicodeDecodeError as error:
                    raise corpus.CorpusException(
                        'Incorrect filename in corpus path: {0}: {1!r}'.format(
                            filename, error))
                for extension in fileExtensions:
                    if filename.endswith(extension):
                        matched.append(os.path.join(rootDirectory, filename))
                        break
        return matched

    def _translateExtensions(
        self,
        fileExtensions=None,
        expandExtensions=True,
    ):
        '''
        Utility to get default extensions, or, optionally, expand extensions to
        all known formats.

        >>> coreCorpus = corpus.corpora.CoreCorpus()
        >>> for extension in coreCorpus._translateExtensions():
        ...     extension
        ...
        '.abc'
        '.capx'
        '.mid'
        '.midi'
        '.xml'
        '.mxl'
        '.mx'
        '.musicxml'
        '.md'
        '.musedata'
        '.zip'
        '.krn'
        '.rntxt'
        '.rntext'
        '.romantext'
        '.rtxt'
        '.nwctxt'
        '.nwc'

        >>> coreCorpus._translateExtensions('.mid', False)
        ['.mid']

        >>> coreCorpus._translateExtensions('.mid', True)
        ['.mid', '.midi']

        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        if len(fileExtensions) == 1 and fileExtensions[0] is None:
            fileExtensions = Corpus._allExtensions
        elif expandExtensions:
            expandedExtensions = []
            for extension in fileExtensions:
                allInputExtensions = common.findInputExtension(extension)
                if allInputExtensions is None:
                    pass
                else:
                    expandedExtensions += allInputExtensions
            return expandedExtensions
        return fileExtensions

    ### PRIVATE PROPERTIES ###

    @abc.abstractproperty
    def cacheName(self):
        raise NotImplementedError

    ### PUBLIC METHODS ###
    @abc.abstractmethod
    def getPaths(self, fileExtensions=None, expandExtensions=True):
        r'''
        The paths of the files in a given corpus.
        '''
        raise NotImplementedError

    def getWorkList(
        self,
        workName,
        movementNumber=None,
        fileExtensions=None,
    ):
        r'''
        Search the corpus and return a list of filenames of works, always in a
        list.

        If no matches are found, an empty list is returned.

        >>> from music21 import corpus
        >>> coreCorpus = corpus.corpora.CoreCorpus()
        
        # returns 1 even though there is a '.mus' file, which cannot be read...
        
        >>> len(coreCorpus.getWorkList('cpebach/h186'))
        1
        >>> len(coreCorpus.getWorkList('cpebach/h186', None, '.xml'))
        1

        >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 3))
        1
        >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 2))
        0

        Make sure that 'verdi' just gets the single Verdi piece and not the
        Monteverdi pieces:

        >>> len(coreCorpus.getWorkList('verdi'))
        1

        '''
        if not common.isListLike(fileExtensions):
            fileExtensions = [fileExtensions]
        paths = self.getPaths(fileExtensions)
        results = []
        # permit workName to be a list of paths/branches
        if common.isIterable(workName):
            workName = os.path.sep.join(workName)
        workSlashes = workName.replace('/', os.path.sep)
        # find all matches for the work name
        # TODO: this should match by path component, not just
        # substring
        for path in paths:
            if workName.lower() in path.lower():
                results.append(path)
            elif workSlashes.lower() in path.lower():
                results.append(path)
        if results:
            # more than one matched...use more stringent criterion:
            # must have a slash before the name
            previousResults = results
            results = []
            longName = os.sep + workSlashes.lower()
            for path in previousResults:
                if longName in path.lower():
                    results.append(path)
            if not results:
                results = previousResults
        movementResults = []
        if movementNumber is not None and results:
            # store one ore more possible mappings of movement number
            movementStrList = []
            # see if this is a pair
            if common.isIterable(movementNumber):
                movementStrList.append(''.join(str(x) for x in movementNumber))
                movementStrList.append('-'.join(
                    str(x) for x in movementNumber))
                movementStrList.append('movement' + '-'.join(
                    str(x) for x in movementNumber))
                movementStrList.append('movement' + '-0'.join(
                    str(x) for x in movementNumber))
            else:
                movementStrList += [
                    '0{0}'.format(movementNumber),
                    str(movementNumber),
                    'movement{0}'.format(movementNumber),
                ]
            for filePath in sorted(results):
                filename = os.path.split(filePath)[1]
                if '.' in filename:
                    filenameWithoutExtension = os.path.splitext(filename)[0]
                else:
                    filenameWithoutExtension = None
                searchPartialMatch = True
                if filenameWithoutExtension is not None:
                    # look for direct matches first
                    for movementStr in movementStrList:
                        #if movementStr.lower() in filePath.lower():
                        if filenameWithoutExtension.lower(
                        ) == movementStr.lower():
                            movementResults.append(filePath)
                            searchPartialMatch = False
                # if we have one direct match, all other matches must
                # be direct. this will match multiple files with different
                # file extensions
                if movementResults:
                    continue
                if searchPartialMatch:
                    for movementStr in movementStrList:
                        if filename.startswith(movementStr.lower()):
                            movementResults.append(filePath)
            if not movementResults:
                pass
        else:
            movementResults = results
        return sorted(set(movementResults))

    def search(self, query, field=None, fileExtensions=None):
        r'''
        Search this corpus for metadata entries, returning a metadataBundle

        >>> corpus.corpora.CoreCorpus().search('3/4')
        <music21.metadata.bundles.MetadataBundle {1870 entries}>

        >>> corpus.corpora.CoreCorpus().search(
        ...      'bach',
        ...      field='composer',
        ...      )
        <music21.metadata.bundles.MetadataBundle {22 entries}>

        >>> predicate = lambda noteCount: noteCount < 20
        >>> corpus.corpora.CoreCorpus().search(
        ...     predicate,
        ...     field='noteCount',
        ...     )
        <music21.metadata.bundles.MetadataBundle {134 entries}>

        '''
        return self.metadataBundle.search(
            query,
            field=field,
            fileExtensions=fileExtensions,
        )

    ### PUBLIC PROPERTIES ###

    @property
    def directoryInformation(self):
        '''
        Returns a tuple of DirectoryInformation objects for a
        each directory in self._directoryInformation.
        
        >>> core = corpus.corpora.CoreCorpus()
        >>> diBrief = core.directoryInformation[0:4]
        >>> diBrief
        (<music21.corpus.work.DirectoryInformation airdsAirs>,
         <music21.corpus.work.DirectoryInformation bach>, 
         <music21.corpus.work.DirectoryInformation beethoven>, 
         <music21.corpus.work.DirectoryInformation ciconia>)
        >>> diBrief[3].directoryTitle
        'Johannes Ciconia'
        '''
        dirInfo = []
        for infoTriple in self._directoryInformation:
            dirInfo.append(
                work.DirectoryInformation(*infoTriple, corpusObject=self))
        return tuple(dirInfo)

    @abc.abstractproperty
    def name(self):
        r'''
        The name of a given corpus.
        '''
        raise NotImplementedError

    @property
    def metadataBundle(self):
        r'''
        The metadata bundle for a corpus:

        >>> from music21 import corpus
        >>> corpus.corpora.CoreCorpus().metadataBundle
        <music21.metadata.bundles.MetadataBundle 'core': {144... entries}>

        As a technical aside, the metadata bundle for a corpus is actually
        stored in corpus.manager, in order to cache most effectively over
        multiple calls. There might be good reasons to eventually move them
        to each Corpus object, so long as its cached across instances of the
        class.
        '''
        from music21.corpus import manager
        return manager.getMetadataBundleByCorpus(self)

    def getComposer(
        self,
        composerName,
        fileExtensions=None,
    ):
        '''
        Return all filenames in the corpus that match a composer's or a
        collection's name. An `fileExtensions`, if provided, defines which
        extensions are returned. An `fileExtensions` of None (default) returns
        all extensions.

        Note that xml and mxl are treated equivalently.

        >>> from music21 import corpus
        >>> coreCorpus = corpus.corpora.CoreCorpus()
        >>> a = coreCorpus.getComposer('bach')
        >>> len(a) > 100
        True

        >>> a = coreCorpus.getComposer('bach', 'krn')
        >>> len(a) < 10
        True

        >>> a = coreCorpus.getComposer('bach', 'xml')
        >>> len(a) > 10
        True
        '''
        paths = self.getPaths(fileExtensions)
        results = []
        for path in paths:
            # iterate through path components; cannot match entire string
            # composer name may be at any level
            stubs = path.split(os.sep)
            for stub in stubs:
                # need to remove extension if found
                if composerName.lower() == stub.lower():
                    results.append(path)
                    break
                # get all but the last dot group
                # this is done for file names that function like composer names
                elif '.' in stub:
                    newStub = '.'.join(stub.split('.')[:-1]).lower()
                    if newStub == composerName.lower():
                        results.append(path)
                        break
        results.sort()
        return results

    def getWorkReferences(self):
        '''
        Return a data dictionary for all works in this corpus 
        Returns a list of corpus.work.DirectoryInformation objects, one
        for each directory. A 'works' dictionary for each composer
        provides references to dictionaries for all associated works.
    
        This is used in the generation of corpus documentation
    
        >>> workRefs = corpus.corpora.CoreCorpus().getWorkReferences()
        >>> workRefs[1:3]
        [<music21.corpus.work.DirectoryInformation bach>, 
         <music21.corpus.work.DirectoryInformation beethoven>]
                 '''
        results = [di for di in self.directoryInformation]

        return results
Beispiel #8
0
            schumann,
            opus41no1,

            luca,

            bach,
            bwv1080,
    ]


# a list of metadataCache's can reside in this module-level storage; this 
# data is loaded on demand. 
_METADATA_BUNDLES = {'core':None, 'virtual':None, 'local':None}

_ALL_EXTENSIONS = (common.findInputExtension('abc') +
                   common.findInputExtension('lily') +
                   common.findInputExtension('musicxml') +
                   common.findInputExtension('musedata') +
                   common.findInputExtension('humdrum'))

# store all composers in the corpus (not virtual) 
# as two element tuples of path name, full name
COMPOSERS = [
    ('beethoven', 'Ludwig van Beethoven'),
    ('ciconia', 'Johannes Ciconia'),
    ('haydn', 'Joseph Haydn'),
    ('handel', 'George Frideric Handel'),
    ('mozart', 'Wolfgang Amadeus Mozart'),
    ('schoenberg', 'Arnold Schoenberg'),
    ('schumann', 'Robert Schumann'),