Beispiel #1
0
def getPaths(
        fileExtensions=None,
        expandExtensions=True,
        name=('local', 'core', 'virtual'),
):
    '''
    Get paths from core, virtual, and/or local corpora.
    This is the public interface for getting all corpus
    paths with one function.
    '''
    paths = []
    if 'core' in name:
        paths += corpora.CoreCorpus().getPaths(
            fileExtensions=fileExtensions,
            expandExtensions=expandExtensions,
        )
    if 'local' in name:
        paths += corpora.LocalCorpus().getPaths(
            fileExtensions=fileExtensions,
            expandExtensions=expandExtensions,
        )
    if 'virtual' in name:
        paths += corpora.VirtualCorpus().getPaths(
            fileExtensions=fileExtensions,
            expandExtensions=expandExtensions,
        )
    return paths
Beispiel #2
0
def fromName(name):
    '''
    Instantiate a specific corpus based on `name`:

    >>> corpus.manager.fromName('core')
    <music21.corpus.corpora.CoreCorpus>

    >>> corpus.manager.fromName('virtual')
    <music21.corpus.corpora.VirtualCorpus>

    >>> corpus.manager.fromName('local')
    <music21.corpus.corpora.LocalCorpus: 'local'>

    >>> corpus.manager.fromName(None)
    <music21.corpus.corpora.LocalCorpus: 'local'>


    Note that this corpus probably does not exist on disk, but it's ready to have
    paths added to it and to be stored on disk.

    >>> corpus.manager.fromName('testDummy')
    <music21.corpus.corpora.LocalCorpus: 'testDummy'>
    '''
    if name == 'core':
        return corpora.CoreCorpus()
    elif name == 'virtual':
        return corpora.VirtualCorpus()
    elif name == 'local':
        return corpora.LocalCorpus()
    else:
        return corpora.LocalCorpus(name=name)
Beispiel #3
0
def iterateCorpora(returnObjects=True):
    '''
    a generator that iterates over the corpora (either as objects or as names)
    for use in pan corpus searching.
    
    This test will only show the first three, because it needs to run the same
    on every system:
    
    >>> for i, corpusObject in enumerate(corpus.manager.iterateCorpora()):
    ...     print(corpusObject)
    ...     if i == 2:
    ...        break
    <music21.corpus.corpora.CoreCorpus>
    <music21.corpus.corpora.VirtualCorpus>
    <music21.corpus.corpora.LocalCorpus: 'local'>    

    We can also get names instead... Note that the name of the main localcorpus is 'local' not
    None

    >>> for i, corpusName in enumerate(corpus.manager.iterateCorpora(returnObjects=False)):
    ...     print(corpusName)
    ...     if i == 2:
    ...        break
    core
    virtual
    local
    
    New in v.3 
    '''
    if returnObjects is True:
        yield corpora.CoreCorpus()
        yield corpora.VirtualCorpus()
        for cn in listLocalCorporaNames():
            yield corpora.LocalCorpus(cn)
    else:
        yield corpora.CoreCorpus().name
        yield corpora.VirtualCorpus().name
        for cn in listLocalCorporaNames():
            if cn is None:
                yield 'local'
            else:
                yield cn
Beispiel #4
0
def getVirtualPaths(fileExtensions=None, expandExtensions=True):
    '''
    Get all paths in the virtual corpus that match a known extension.

    An extension of None will return all known extensions.

    >>> len(corpus.getVirtualPaths()) > 6
    True

    '''
    return corpora.VirtualCorpus().getPaths(
        fileExtensions=fileExtensions,
        expandExtensions=expandExtensions,
    )
Beispiel #5
0
def getVirtualWorkList(workName, movementNumber=None, fileExtensions=None):
    '''
    Given a work name, search all virtual works and return a list of URLs for
    any matches.


    >>> corpus.getVirtualWorkList('bach/bwv1007/prelude')
    ['http://kern.ccarh.org/cgi-bin/ksdata?l=cc/bach/cello&file=bwv1007-01.krn&f=xml']

    >>> corpus.getVirtualWorkList('junk')
    []

    '''
    return corpora.VirtualCorpus().getWorkList(
        workName,
        movementNumber=movementNumber,
        fileExtensions=fileExtensions,
    )
Beispiel #6
0
def fromCacheName(name):
    '''
    Instantiate a specific corpus based on its `cacheName`:

    These are the same as `fromName`.

    >>> corpus.manager.fromCacheName('core')
    <music21.corpus.corpora.CoreCorpus>

    >>> corpus.manager.fromCacheName('virtual')
    <music21.corpus.corpora.VirtualCorpus>

    >>> corpus.manager.fromCacheName('local')
    <music21.corpus.corpora.LocalCorpus: 'local'>

    >>> corpus.manager.fromCacheName(None)
    <music21.corpus.corpora.LocalCorpus: 'local'>

    Other local corpora are different and prefaced by "local-":

    >>> corpus.manager.fromCacheName('local-testDummy')
    <music21.corpus.corpora.LocalCorpus: 'testDummy'>

    Raises a corpus exception if
    it is not an allowable cache name.

    >>> corpus.manager.fromCacheName('testDummy')
    Traceback (most recent call last):
    music21.exceptions21.CorpusException: Cannot parse a cacheName of 'testDummy'
    '''
    if name == 'core':
        return corpora.CoreCorpus()
    elif name == 'virtual':
        return corpora.VirtualCorpus()
    elif name == 'local' or name is None:
        return corpora.LocalCorpus()
    elif name.startswith('local-'):
        return corpora.LocalCorpus(name=name[6:])
    else:
        raise CorpusException("Cannot parse a cacheName of '{0}'".format(name))
Beispiel #7
0
def cacheMetadata(corpusNames=('local', 'core', 'virtual'),
                  useMultiprocessing=True,
                  verbose=False):
    '''
    Cache metadata from corpora in `corpusNames` as local cache files:

    Call as ``metadata.cacheMetadata()``

    '''
    from music21 import corpus
    from music21.corpus import corpora
    from music21 import metadata

    if not common.isIterable(corpusNames):
        corpusNames = (corpusNames,)

    timer = common.Timer()
    timer.start()

    # store list of file paths that caused an error
    failingFilePaths = []

    # the core cache is based on local files stored in music21
    # virtual is on-line
    for corpusName in corpusNames:
        if corpusName == 'core':
            metadataBundle = corpora.CoreCorpus().metadataBundle
            paths = corpus.getCorePaths()
            useCorpus = True
        elif corpusName == 'local':
            metadataBundle = corpora.LocalCorpus().metadataBundle
            paths = corpus.getLocalPaths()
            useCorpus = False
        elif corpusName == 'virtual':
            metadataBundle = corpora.VirtualCorpus().metadataBundle
            paths = corpus.getVirtualPaths()
            useCorpus = False
        else:
            message = 'invalid corpus name provided: {0!r}'.format(corpusName)
            raise MetadataCacheException(message)
        message = 'metadata cache: starting processing of paths: {0}'.format(
                len(paths))
        if verbose is True:
            environLocal.warn(message)
        else:
            environLocal.printDebug(message)

        failingFilePaths += metadataBundle.addFromPaths(
            paths,
            useCorpus=useCorpus,
            useMultiprocessing=useMultiprocessing,
            verbose=verbose
            )
        message = 'cache: writing time: {0} md items: {1}'.format(
            timer, len(metadataBundle))
        if verbose is True:
            environLocal.warn(message)
        else:
            environLocal.printDebug(message)
        del metadataBundle
    message = 'cache: final writing time: {0} seconds'.format(timer)
    if verbose is True:
        environLocal.warn(message)
    else:
        environLocal.printDebug(message)
    for failingFilePath in failingFilePaths:
        message = 'path failed to parse: {0}'.format(failingFilePath)
        if verbose is True:
            environLocal.warn(message)
        else:
            environLocal.printDebug(message)