Python registerHandlers Examples

Programming Language: Python

Namespace/Package Name: esgcet.config

Method/Function: registerHandlers

Examples at hotexamples.com: 13

Python registerHandlers - 13 examples found. These are the top rated real world Python examples of esgcet.config.registerHandlers extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: extraction_controls.py Project: gavinmbell/esg-publisher

def call_sessionmaker( root ):
    from sqlalchemy.orm import sessionmaker
    from sqlalchemy import create_engine
    from esgcet.config import loadConfig, initLogging, registerHandlers

    # init_file = "../scripts/esg.ini"
    init_file = None                    # Load installed init file
    echoSql = True

    # Load the configuration and set up a database connection
    config = loadConfig(init_file)
    root.engine = create_engine(config.getdburl('extract'), echo=root.echoSql, pool_recycle=3600)
    initLogging('extract', override_sa=root.engine)
    Session = sessionmaker(bind=root.engine, autoflush=True, autocommit=False)

    # Register project handlers
    registerHandlers()

    root.config = config
    root.Session = Session
    root.projectName = None
    root.firstFile = None
    root.dmap = None
    root.extraFields = None
    root.directoryMap = None
    root.datasetMapfile = None
    root.filefilt = None

Example #2

Show file

def call_sessionmaker(root):
    from sqlalchemy.orm import sessionmaker
    from sqlalchemy import create_engine
    from esgcet.config import loadConfig, initLogging, registerHandlers

    # init_file = "../scripts/esg.ini"
    init_file = None  # Load installed init file
    echoSql = True

    # Load the configuration and set up a database connection
    config = loadConfig(init_file)
    root.engine = create_engine(config.getdburl('extract'),
                                echo=root.echoSql,
                                pool_recycle=3600)
    initLogging('extract', override_sa=root.engine)
    Session = sessionmaker(bind=root.engine, autoflush=True, autocommit=False)

    # Register project handlers
    registerHandlers()

    root.config = config
    root.Session = Session
    root.projectName = None
    root.firstFile = None
    root.dmap = None
    root.extraFields = None
    root.directoryMap = None
    root.datasetMapfile = None
    root.filefilt = None

Example #3

Show file

File: replica.py Project: ESGF/esg-publisher

def publishCatalogs(threddsCatalogDictionary, parentDatasetIdDictionary, thredds=True, las=True, publish=True):
    """
    Add one or more THREDDS catalogs to the THREDDS catalog directory, reinitialize THREDDS, and publish the catalogs to the gateway.

    Returns a dictionary mapping (datasetName, version) => status, as returned from ``publish.publishDatasetList``. If ``publish`` is False, the return dictionary is empty. 

    threddsCatalogDictionary
      Dictionary of THREDDS catalogs, as returned from ``generateReplicaThreddsCatalog``. The dictionary maps datasetId => String THREDDS catalog.

    parentDatasetIdDictionary
      Dictionary mapping datasetId => parent dataset identifier in the gateway hierarchy.

    thredds=True
      Boolean flag. If True, copy the catalogs to the THREDDS catalog directory and reinitialize the TDS server.

    las=True
      Boolean flag. If True, reinitialize the LAS server.

    publish=True
      Boolean flag. If True, publish the catalog(s) to the gateway.
    
    """
    # Load the configuration and set up a database connection
    config, Session = initdb()

    # Register project handlers
    registerHandlers()

    datasetNames = threddsCatalogDictionary.keys()
    result = publishDatasetList(datasetNames, Session, publish=publish, thredds=thredds, las=las, parentId=parentDatasetIdDictionary, threddsCatalogDictionary=threddsCatalogDictionary, reinitThredds=None, readFromCatalog=True)
    return result

Example #4

Show file

def load_configuration(parent):
    import os
    import pub_controls
    from esgcet.config import getHandler, getHandlerByName, registerHandlers, CFHandler
    from sqlalchemy.orm import sessionmaker
    from esgcet.publish import multiDirectoryIterator, datasetMapIterator

    offline = parent.offline
    firstFile = parent.firstFile
    projectName = parent.projectName
    config = parent.config
    Session = parent.Session

    dmap = parent.dmap
    datasetNames = parent.datasetNames
    datasetMapfile = parent.datasetMapfile

    for datasetName in datasetNames:

        # Get a file iterator and sample file
        if datasetMapfile is not None:
            firstFile = dmap[datasetName][0][0]
            fileiter = datasetMapIterator(dmap, datasetName)
        else:
            direcTuples = parent.directoryMap[datasetName]
            firstDirec, sampleFile = direcTuples[0]
            firstFile = os.path.join(firstDirec, sampleFile)
            fileiter = multiDirectoryIterator(
                [direc for direc, sampfile in direcTuples], parent.filefilt)

        # Register project handlers
        registerHandlers()

        # If the project is not specified, try to read it from the first file
        validate = True
        if projectName is not None:
            handler = getHandlerByName(projectName,
                                       firstFile,
                                       Session,
                                       validate=validate,
                                       offline=offline)
        else:
            handler = getHandler(firstFile, Session, validate=validate)

        parent.handler = handler

    # View the collection of datasets
    tab_name = "Collection %i" % parent.top_ct
    parent.ntk.new_page(parent, tab_name)

Example #5

Show file

File: replica.py Project: sashakames/esg-publisher

def publishCatalogs(threddsCatalogDictionary,
                    parentDatasetIdDictionary,
                    thredds=True,
                    las=True,
                    publish=True):
    """
    Add one or more THREDDS catalogs to the THREDDS catalog directory, reinitialize THREDDS, and publish the catalogs to the gateway.

    Returns a dictionary mapping (datasetName, version) => status, as returned from ``publish.publishDatasetList``. If ``publish`` is False, the return dictionary is empty. 

    threddsCatalogDictionary
      Dictionary of THREDDS catalogs, as returned from ``generateReplicaThreddsCatalog``. The dictionary maps datasetId => String THREDDS catalog.

    parentDatasetIdDictionary
      Dictionary mapping datasetId => parent dataset identifier in the gateway hierarchy.

    thredds=True
      Boolean flag. If True, copy the catalogs to the THREDDS catalog directory and reinitialize the TDS server.

    las=True
      Boolean flag. If True, reinitialize the LAS server.

    publish=True
      Boolean flag. If True, publish the catalog(s) to the gateway.
    
    """
    # Load the configuration and set up a database connection
    config, Session = initdb()

    # Register project handlers
    registerHandlers()

    datasetNames = threddsCatalogDictionary.keys()
    result = publishDatasetList(
        datasetNames,
        Session,
        publish=publish,
        thredds=thredds,
        las=las,
        parentId=parentDatasetIdDictionary,
        threddsCatalogDictionary=threddsCatalogDictionary,
        reinitThredds=None,
        readFromCatalog=True)
    return result

Example #6

Show file

File: extraction_controls.py Project: gavinmbell/esg-publisher

def load_configuration( parent ):
    import os
    import pub_controls
    from esgcet.config import getHandler, getHandlerByName, registerHandlers, CFHandler
    from sqlalchemy.orm import sessionmaker
    from esgcet.publish import multiDirectoryIterator, datasetMapIterator

    offline = parent.offline
    firstFile = parent.firstFile
    projectName = parent.projectName
    config = parent.config
    Session = parent.Session

    dmap = parent.dmap
    datasetNames = parent.datasetNames
    datasetMapfile = parent.datasetMapfile

    for datasetName in datasetNames:

        # Get a file iterator and sample file
        if datasetMapfile is not None:
            firstFile = dmap[datasetName][0][0]
            fileiter = datasetMapIterator(dmap, datasetName)
        else:
            direcTuples = parent.directoryMap[datasetName]
            firstDirec, sampleFile = direcTuples[0]
            firstFile = os.path.join(firstDirec, sampleFile)
            fileiter  = multiDirectoryIterator([direc for direc, sampfile in direcTuples],parent.filefilt)

        # Register project handlers
        registerHandlers()

        # If the project is not specified, try to read it from the first file
        validate = True
        if projectName is not None:
            handler = getHandlerByName(projectName,firstFile,Session,validate=validate,offline=offline)
        else:
            handler = getHandler(firstFile, Session, validate=validate)

        parent.handler = handler

    # View the collection of datasets
    tab_name= "Collection %i" % parent.top_ct
    parent.ntk.new_page( parent, tab_name )

Example #7

Show file

File: esgscan_directory.py Project: lukaszlacinski/esgf-ingestion

def main(argv):
    try:
        args, lastargs = getopt.getopt(argv, "a:ehi:o:p:", ['dataset=', 'dataset-tech-notes=', 'dataset-tech-notes-title=',\
            'filter=', 'help', 'max-threads=', 'offline', 'output=', 'project=', 'property=', 'read-directories', 'read-files',\
            'service=', 'use-version-dir', 'version='])
    except getopt.error:
        print sys.exc_value
        return

    if len(lastargs)==0:
        print 'No directory specified'
        return

    appendMap = None
    datasetName = None
    datasetTechNotesURL = None
    datasetTechNotesTitle = None
    filefilt = '.*\.nc$'
    init_file = None
    offline = False
    output = sys.stdout
    projectName = None
    properties = {}
    readFiles = False
    service = None
    max_threads = 4
    version_dir = False
    use_version = None
    
    for flag, arg in args:
        if flag=='-a':
            if os.path.exists(arg):
                appendMap = readDatasetMap(arg)
            else:
                appendMap = {}
            output = open(arg, 'a')
        elif flag=='--dataset':
            datasetName = arg
        elif flag=='--dataset-tech-notes':
            datasetTechNotesURL = arg
        elif flag=='--dataset-tech-notes-title':
            datasetTechNotesTitle = arg
        elif flag=='--filter':
            filefilt = arg
        elif flag in ['-h', '--help']:
            print usage
            sys.exit(0)
        elif flag=='-i':
            init_file = arg
        elif flag=='--max-threads':
            max_threads = int(arg)
        elif flag in ['-o', '--output']:
            output = open(arg, 'w')
        elif flag=='--offline':
            offline = True
        elif flag=='--project':
            projectName = arg
        elif flag in ['-p', '--property']:
            name, value = arg.split('=')
            properties[name] = value
        elif flag in ['-e', '--read-directories']:
            readFiles = False
        elif flag=='--read-files':
            readFiles = True
        elif flag=='--service':
            service = arg
        elif flag=='--use-version-dir':
            version_dir = True
        elif flag=='--version':
            version_dir = True
            if not re.match('^[0-9]+$', arg[0]): # e.g. 'vYYYYMMDD'
                use_version = arg[1:]
            else:
                use_version = arg
    
    # Load the configuration and set up a database connection
    config = loadConfig(init_file)
    engine = create_engine(config.getdburl('extract'), echo=False, pool_recycle=3600)
    initLogging('extract', override_sa=engine)
    Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)

    # Register project handlers
    registerHandlers()

    if not offline:

        # Determine if checksumming is enabled
        line = config.get('DEFAULT', 'checksum', default=None)
        if line is not None:
            checksumClient, checksumType = splitLine(line)
        else:
            checksumClient = None

        if projectName is not None:
            handler = getHandlerByName(projectName, None, Session)
        else:
            warning("No project name specified!")
            multiIter = multiDirectoryIterator(lastargs, filefilt=filefilt)
            firstFile, size = multiIter.next()
            handler = getHandler(firstFile, Session, validate=True)
            if handler is None:
                raise ESGPublishError("No project found in file %s, specify with --project."%firstFile)
            projectName = handler.name

        if not readFiles:
            datasetMap = handler.generateDirectoryMap(lastargs, filefilt, initContext=properties, datasetName=datasetName, use_version=version_dir)
        else:
            datasetMap = handler.generateDirectoryMapFromFiles(lastargs, filefilt, initContext=properties, datasetName=datasetName)

        # Output the map
        keys = datasetMap.keys()
        keys.sort()

        datasetMapVersion = {}
        if version_dir:
            # check for version directory
            for dataset_id in keys:
                ds_id_version = dataset_id.split('#')
                if len(ds_id_version) == 2:
                    ds_id, ds_version = ds_id_version
                    if not re.match('^[0-9]+$', ds_version):
                        warning("Version must be an integer. Skipping version %s of dataset %s."%(ds_version, ds_id))
                        continue
                    if use_version and ds_version != use_version:
                            continue
                    if ds_id in datasetMapVersion:
                        datasetMapVersion[ds_id].append(ds_version)
                    else:
                        datasetMapVersion[ds_id] = [ds_version]
                else:
                    error("No version directory found. Skipping dataset %s."%dataset_id)

            if datasetMapVersion:
                keys = datasetMapVersion.keys()
                keys.sort()
            else:
                if use_version:
                    error("Version %s not found. No datasets to process."%use_version)
                else:
                    error("No datasets to process.")
                return

        for dataset_id in keys:
            skip_dataset = False
            dataset_id_version = dataset_id
            path_version = None
            # if multiple versions of the same dataset available use latest version
            if version_dir:
                path_version = sorted(datasetMapVersion[dataset_id])[-1]
                if len(datasetMapVersion[dataset_id]) > 1:
                    info("Multiple versions for %s (%s), processing latest (%s)"%(dataset_id, datasetMapVersion[dataset_id], path_version))
                dataset_id_version = '%s#%s'%(dataset_id, path_version)

            direcTuple = datasetMap[dataset_id_version]
            direcTuple.sort()
            mapfile_md = {}

            for nodepath, filepath in direcTuple:

                # If readFiles is not set, generate a map entry for each file in the directory
                # that matches filefilt ...
                if not readFiles:
                    itr = directoryIterator(nodepath, filefilt=filefilt, followSubdirectories=False)
                # ... otherwise if readFiles is set, generate a map entry for each file
                else:
                    itr = fnIterator([filepath])

                for filepath, sizet in itr:
                    size, mtime = sizet

                    mapfile_md[filepath] = [size]
                    mapfile_md[filepath].append("mod_time=%f"%float(mtime))

                    extraStuff = "mod_time=%f"%float(mtime)

                    if datasetTechNotesURL is not None:
                        mapfile_md[filepath].append('dataset_tech_notes=%s'%datasetTechNotesURL)
                        if datasetTechNotesURL is not None:
                            mapfile_md[filepath].append('dataset_tech_notes_title=%s'%datasetTechNotesTitle)

            if checksumClient is not None:
                pool = ThreadPool(processes=max_threads)
                args = [(filepath, checksumClient) for filepath in mapfile_md]
                checksum_list = pool.map(calc_checksum_wrapper, args)

                for entry in checksum_list:
                    if not entry[1]:
                        error('Calculation of checksum for file %s failed. Skipping dataset %s ...'%(entry[0], dataset_id))
                        skip_dataset = True     # skip entire dataset if we have one file without checksum
                        break
                    mapfile_md[entry[0]].append('checksum=%s'%entry[1])
                    mapfile_md[entry[0]].append('checksum_type=%s'%checksumType)

            for fpath in mapfile_md:
                mapfile_line = '%s | %s | %d'%(dataset_id_version, fpath, mapfile_md[fpath][0])

                for md in mapfile_md[fpath][1:]:
                    mapfile_line+=' | %s'%md

                # Print the map entry if:
                # - Checksum exists for all files of dataset (in case checksumming is enabled)
                # - The map is being created, not appended, or
                # - The existing map does not have the dataset, or
                # - The existing map has the dataset, but not the file.
                if path_version:
                    ds_id = (dataset_id, int(path_version))
                else:
                    ds_id = (dataset_id, -1)
                if not skip_dataset and ( (appendMap is None) or (not appendMap.has_key(ds_id)) or (( fpath, "%d"% mapfile_md[fpath][1]) not in appendMap[ds_id]) ):
                    print >>output, mapfile_line

    else:                               # offline
        if projectName is not None:
            handler = getHandlerByName(projectName, None, Session, offline=True)
        else:
            raise ESGPublishError("Must specify --project for offline datasets.")
        listerSection = getOfflineLister(config, "project:%s"%projectName, service)
        offlineLister = config.get(listerSection, 'offline_lister_executable')
        commandArgs = "--config-section %s "%listerSection
        commandArgs += " ".join(lastargs)
        for dsetName, filepath, sizet in processNodeMatchIterator(offlineLister, commandArgs, handler, filefilt=filefilt, datasetName=datasetName, offline=True):
            size, mtime = sizet
            extrastuff = ""
            if mtime is not None:
                extrastuff = "| mod_time=%f"%float(mtime)
            if (appendMap is None) or (not appendMap.has_key(dsetName)) or ((filepath, "%d"%size) not in appendMap[dsetName]):
                print >>output, "%s | %s | %d %s"%(dsetName, filepath, size, extrastuff)

    if output is not sys.stdout:
        output.close()

Example #8

Show file

def esgpublishWrapper(**kw):

    from esgcet.query import queryDatasetMap

    aggregateDimension = kw.get("aggregateDimension", "time")
    datasetMapfile = kw.get("datasetMapfile", None)
    datasetName = kw.get("datasetName", None)
    directoryList = kw.get("directoryList", None)
    echoSql = kw.get("echoSql", False)
    filefilt = kw.get("filefilt", '.*\.nc$')
    init_file = kw.get("init_file", None)
    initcontext = kw.get("initcontext", {})
    keepVersion = kw.get("keepVersion", False)
    las = kw.get("las", False)
    log_filename = kw.get("log_filename", None)
    masterGateway = kw.get("masterGateway", None)
    message = kw.get("message", None)
    offline = kw.get("offline", False)
    parent = kw.get("parent", None)
    perVariable = kw.get("perVariable", None)
    projectName = kw.get("projectName", None)
    properties = kw.get("properties", {})
    publish = kw.get("publish", False)
    publishOnly = kw.get("publishOnly", False)
    publishOp = kw.get("publishOp", CREATE_OP)
    readFiles = kw.get("readFiles", False)
    readFromCatalog = kw.get("readFromCatalog", False)
    reinitThredds = kw.get("reinitThredds", None)
    rescan = kw.get("rescan", False)
    rescanDatasetName = kw.get("rescanDatasetName", [])
    resultThreddsDictionary = None
    service = kw.get("service", None)
    summarizeErrors = kw.get("summarizeErrors", False)
    testProgress1 = kw.get("testProgress1", None)
    testProgress2 = kw.get("testProgress2", None)
    thredds = kw.get("thredds", False)
    threddsCatalogDictionary = kw.get("threddsCatalogDictionary", None)
    version = kw.get("version", None)

    # If offline, the project must be specified
    if offline and (projectName is None):
        raise ESGPublishError(
            "Must specify project with --project for offline datasets")

    # Must specify version for replications
    if masterGateway is not None and version is None:
        raise ESGPublishError(
            "Must specify version with --new-version for replicated datasets")

    # Load the configuration and set up a database connection
    config, Session = initdb(init_file=init_file,
                             echoSql=echoSql,
                             log_filename=log_filename)

    # Register project handlers
    registerHandlers()

    # If the dataset map is input, just read it ...
    dmap = None
    directoryMap = None
    extraFields = None
    if datasetMapfile is not None:
        dmap, extraFields = readDatasetMap(datasetMapfile,
                                           parse_extra_fields=True)
        datasetNames = dmap.keys()

    elif rescan:
        # Note: No need to get the extra fields, such as mod_time, since
        # they are already in the database, and will be used for file comparison if necessary.
        dmap, offline = queryDatasetMap(rescanDatasetName, Session)
        datasetNames = dmap.keys()

    # ... otherwise generate the directory map.
    else:
        # Online dataset(s)
        if not offline:

            if projectName is not None:
                handler = getHandlerByName(projectName, None, Session)
            else:
                multiIter = multiDirectoryIterator(directoryList,
                                                   filefilt=filefilt)
                firstFile, size = multiIter.next()
                listIter = list(multiIter)
                handler = getHandler(firstFile, Session, validate=True)
                if handler is None:
                    raise ESGPublishError(
                        "No project found in file %s, specify with --project."
                        % firstFile)
                projectName = handler.name

            props = properties.copy()
            props.update(initcontext)
            if not readFiles:
                directoryMap = handler.generateDirectoryMap(
                    directoryList,
                    filefilt,
                    initContext=props,
                    datasetName=datasetName)
            else:
                directoryMap = handler.generateDirectoryMapFromFiles(
                    directoryList,
                    filefilt,
                    initContext=props,
                    datasetName=datasetName)

            datasetNames = [(item, -1) for item in directoryMap.keys()]

        # Offline dataset. Format the spec as a dataset map : dataset_name => [(path, size), (path, size), ...]
        else:
            handler = getHandlerByName(projectName,
                                       None,
                                       Session,
                                       offline=True)
            dmap = {}
            listerSection = getOfflineLister(config,
                                             "project:%s" % projectName,
                                             service)
            offlineLister = config.get(listerSection,
                                       'offline_lister_executable')
            commandArgs = "--config-section %s " % listerSection
            commandArgs += " ".join(directoryList)
            for dsetName, filepath, sizet in processNodeMatchIterator(
                    offlineLister,
                    commandArgs,
                    handler,
                    filefilt=filefilt,
                    datasetName=datasetName,
                    offline=True):
                size, mtime = sizet
                if dmap.has_key((dsetName, -1)):
                    dmap[(dsetName, -1)].append((filepath, str(size)))
                else:
                    dmap[(dsetName, -1)] = [(filepath, str(size))]

            datasetNames = dmap.keys()

    datasetNames.sort()
    if len(datasetNames) == 0:
        warning("No datasets found.")

    # Iterate over datasets
    if not publishOnly:
        datasets = iterateOverDatasets(projectName,
                                       dmap,
                                       directoryMap,
                                       datasetNames,
                                       Session,
                                       aggregateDimension,
                                       publishOp,
                                       filefilt,
                                       initcontext,
                                       offline,
                                       properties,
                                       keepVersion=keepVersion,
                                       newVersion=version,
                                       extraFields=extraFields,
                                       masterGateway=masterGateway,
                                       comment=message,
                                       readFiles=readFiles)

    result = publishDatasetList(
        datasetNames,
        Session,
        publish=publish,
        thredds=thredds,
        las=las,
        parentId=parent,
        service=service,
        perVariable=perVariable,
        threddsCatalogDictionary=threddsCatalogDictionary,
        reinitThredds=reinitThredds,
        readFromCatalog=readFromCatalog)

    return result

Example #9

Show file

def esgscanWrapper(directoryList, **kw):

    if len(directoryList) == 0:
        raise ESGPublishError('No directory specified')

    output = sys.stdout
    appendMap = None
    appendPath = kw.get("appendPath", None)
    if appendPath is not None:
        if os.path.exists(appendPath):
            appendMap = readDatasetMap(appendPath)
        else:
            appendMap = {}
        output = open(appendPath, 'a')
    datasetName = kw.get("datasetName", None)
    filefilt = kw.get("fileFilt", '.*\.nc$')
    init_file = kw.get("initFile", None)
    offline = kw.get("offline", False)
    outputPath = kw.get("outputPath", None)
    if outputPath is not None:
        output = open(outputPath, 'w')
    else:
        output = sys.stdout
    projectName = kw.get("projectName", None)
    readFiles = kw.get("readFiles", False)
    service = kw.get("service", None)

    # Load the configuration and set up a database connection
    config, Session = initdb(init_file=init_file)

    # Register project handlers
    registerHandlers()

    if not offline:

        # Determine if checksumming is enabled
        line = config.get('DEFAULT', 'checksum', default=None)
        if line is not None:
            checksumClient, checksumType = splitLine(line)
        else:
            checksumClient = None

        if projectName is not None:
            handler = getHandlerByName(projectName, None, Session)
        else:
            multiIter = multiDirectoryIterator(directoryList,
                                               filefilt=filefilt)
            firstFile, size = multiIter.next()
            handler = getHandler(firstFile, Session, validate=True)
            if handler is None:
                raise ESGPublishError(
                    "No project found in file %s, specify with --project." %
                    firstFile)
            projectName = handler.name

        if not readFiles:
            datasetMap = handler.generateDirectoryMap(directoryList,
                                                      filefilt,
                                                      datasetName=datasetName)
        else:
            datasetMap = handler.generateDirectoryMapFromFiles(
                directoryList, filefilt, datasetName=datasetName)

        # Output the map
        keys = datasetMap.keys()
        keys.sort()
        for datasetId in keys:
            direcTuple = datasetMap[datasetId]
            direcTuple.sort()
            for nodepath, filepath in direcTuple:

                # If readFiles is not set, generate a map entry for each file in the directory
                # that matches filefilt ...
                if not readFiles:
                    itr = directoryIterator(nodepath,
                                            filefilt=filefilt,
                                            followSubdirectories=False)
                # ... otherwise if readFiles is set, generate a map entry for each file
                else:
                    itr = fnIterator([filepath])

                for filepath, sizet in itr:
                    size, mtime = sizet
                    extraStuff = "mod_time=%f" % float(mtime)

                    if checksumClient is not None:
                        csum = checksum(filepath, checksumClient)
                        extraStuff += " | checksum=%s | checksum_type=%s" % (
                            csum, checksumType)

                    # Print the map entry if:
                    # - The map is being created, not appended, or
                    # - The existing map does not have the dataset, or
                    # - The existing map has the dataset, but not the file.
                    if (appendMap is
                            None) or (not appendMap.has_key(datasetId)) or (
                                (filepath, "%d" % size)
                                not in appendMap[datasetId]):
                        print >> output, "%s | %s | %d | %s" % (
                            datasetId, filepath, size, extraStuff)
    else:  # offline
        if projectName is not None:
            handler = getHandlerByName(projectName,
                                       None,
                                       Session,
                                       offline=True)
        else:
            raise ESGPublishError(
                "Must specify --project for offline datasets.")
        listerSection = getOfflineLister(config, "project:%s" % projectName,
                                         service)
        offlineLister = config.get(listerSection, 'offline_lister_executable')
        commandArgs = "--config-section %s " % listerSection
        commandArgs += " ".join(directoryList)
        for dsetName, filepath, sizet in processNodeMatchIterator(
                offlineLister,
                commandArgs,
                handler,
                filefilt=filefilt,
                datasetName=datasetName,
                offline=True):
            size, mtime = sizet
            extrastuff = ""
            if mtime is not None:
                extrastuff = "| mod_time=%f" % float(mtime)
            if (appendMap is None) or (not appendMap.has_key(dsetName)) or (
                (filepath, "%d" % size) not in appendMap[dsetName]):
                print >> output, "%s | %s | %d %s" % (dsetName, filepath, size,
                                                      extrastuff)

    if output is not sys.stdout:
        output.close()

Example #10

Show file

File: wrappers.py Project: hramthun/esg-publisher

def esgpublishWrapper(**kw):

    from esgcet.query import queryDatasetMap

    aggregateDimension = kw.get("aggregateDimension", "time")
    datasetMapfile = kw.get("datasetMapfile", None)
    datasetName = kw.get("datasetName", None)
    directoryList = kw.get("directoryList", None)
    echoSql = kw.get("echoSql", False)
    filefilt = kw.get("filefilt", ".*\.nc$")
    init_file = kw.get("init_file", None)
    initcontext = kw.get("initcontext", {})
    keepVersion = kw.get("keepVersion", False)
    las = kw.get("las", False)
    log_filename = kw.get("log_filename", None)
    masterGateway = kw.get("masterGateway", None)
    message = kw.get("message", None)
    offline = kw.get("offline", False)
    parent = kw.get("parent", None)
    perVariable = kw.get("perVariable", None)
    projectName = kw.get("projectName", None)
    properties = kw.get("properties", {})
    publish = kw.get("publish", False)
    publishOnly = kw.get("publishOnly", False)
    publishOp = kw.get("publishOp", CREATE_OP)
    readFiles = kw.get("readFiles", False)
    readFromCatalog = kw.get("readFromCatalog", False)
    reinitThredds = kw.get("reinitThredds", None)
    rescan = kw.get("rescan", False)
    rescanDatasetName = kw.get("rescanDatasetName", [])
    resultThreddsDictionary = None
    service = kw.get("service", None)
    summarizeErrors = kw.get("summarizeErrors", False)
    testProgress1 = kw.get("testProgress1", None)
    testProgress2 = kw.get("testProgress2", None)
    thredds = kw.get("thredds", False)
    threddsCatalogDictionary = kw.get("threddsCatalogDictionary", None)
    version = kw.get("version", None)

    # If offline, the project must be specified
    if offline and (projectName is None):
        raise ESGPublishError("Must specify project with --project for offline datasets")

    # Must specify version for replications
    if masterGateway is not None and version is None:
        raise ESGPublishError("Must specify version with --new-version for replicated datasets")

    # Load the configuration and set up a database connection
    config, Session = initdb(init_file=init_file, echoSql=echoSql, log_filename=log_filename)

    # Register project handlers
    registerHandlers()

    # If the dataset map is input, just read it ...
    dmap = None
    directoryMap = None
    extraFields = None
    if datasetMapfile is not None:
        dmap, extraFields = readDatasetMap(datasetMapfile, parse_extra_fields=True)
        datasetNames = dmap.keys()

    elif rescan:
        # Note: No need to get the extra fields, such as mod_time, since
        # they are already in the database, and will be used for file comparison if necessary.
        dmap, offline = queryDatasetMap(rescanDatasetName, Session)
        datasetNames = dmap.keys()

    # ... otherwise generate the directory map.
    else:
        # Online dataset(s)
        if not offline:

            if projectName is not None:
                handler = getHandlerByName(projectName, None, Session)
            else:
                multiIter = multiDirectoryIterator(directoryList, filefilt=filefilt)
                firstFile, size = multiIter.next()
                listIter = list(multiIter)
                handler = getHandler(firstFile, Session, validate=True)
                if handler is None:
                    raise ESGPublishError("No project found in file %s, specify with --project." % firstFile)
                projectName = handler.name

            props = properties.copy()
            props.update(initcontext)
            if not readFiles:
                directoryMap = handler.generateDirectoryMap(
                    directoryList, filefilt, initContext=props, datasetName=datasetName
                )
            else:
                directoryMap = handler.generateDirectoryMapFromFiles(
                    directoryList, filefilt, initContext=props, datasetName=datasetName
                )

            datasetNames = [(item, -1) for item in directoryMap.keys()]

        # Offline dataset. Format the spec as a dataset map : dataset_name => [(path, size), (path, size), ...]
        else:
            handler = getHandlerByName(projectName, None, Session, offline=True)
            dmap = {}
            listerSection = getOfflineLister(config, "project:%s" % projectName, service)
            offlineLister = config.get(listerSection, "offline_lister_executable")
            commandArgs = "--config-section %s " % listerSection
            commandArgs += " ".join(directoryList)
            for dsetName, filepath, sizet in processNodeMatchIterator(
                offlineLister, commandArgs, handler, filefilt=filefilt, datasetName=datasetName, offline=True
            ):
                size, mtime = sizet
                if dmap.has_key((dsetName, -1)):
                    dmap[(dsetName, -1)].append((filepath, str(size)))
                else:
                    dmap[(dsetName, -1)] = [(filepath, str(size))]

            datasetNames = dmap.keys()

    datasetNames.sort()
    if len(datasetNames) == 0:
        warning("No datasets found.")

    # Iterate over datasets
    if not publishOnly:
        datasets = iterateOverDatasets(
            projectName,
            dmap,
            directoryMap,
            datasetNames,
            Session,
            aggregateDimension,
            publishOp,
            filefilt,
            initcontext,
            offline,
            properties,
            keepVersion=keepVersion,
            newVersion=version,
            extraFields=extraFields,
            masterGateway=masterGateway,
            comment=message,
            readFiles=readFiles,
        )

    result = publishDatasetList(
        datasetNames,
        Session,
        publish=publish,
        thredds=thredds,
        las=las,
        parentId=parent,
        service=service,
        perVariable=perVariable,
        threddsCatalogDictionary=threddsCatalogDictionary,
        reinitThredds=reinitThredds,
        readFromCatalog=readFromCatalog,
    )

    return result

Example #11

Show file

File: wrappers.py Project: hramthun/esg-publisher

def esgscanWrapper(directoryList, **kw):

    if len(directoryList) == 0:
        raise ESGPublishError("No directory specified")

    output = sys.stdout
    appendMap = None
    appendPath = kw.get("appendPath", None)
    if appendPath is not None:
        if os.path.exists(appendPath):
            appendMap = readDatasetMap(appendPath)
        else:
            appendMap = {}
        output = open(appendPath, "a")
    datasetName = kw.get("datasetName", None)
    filefilt = kw.get("fileFilt", ".*\.nc$")
    init_file = kw.get("initFile", None)
    offline = kw.get("offline", False)
    outputPath = kw.get("outputPath", None)
    if outputPath is not None:
        output = open(outputPath, "w")
    else:
        output = sys.stdout
    projectName = kw.get("projectName", None)
    readFiles = kw.get("readFiles", False)
    service = kw.get("service", None)

    # Load the configuration and set up a database connection
    config, Session = initdb(init_file=init_file)

    # Register project handlers
    registerHandlers()

    if not offline:

        # Determine if checksumming is enabled
        line = config.get("DEFAULT", "checksum", default=None)
        if line is not None:
            checksumClient, checksumType = splitLine(line)
        else:
            checksumClient = None

        if projectName is not None:
            handler = getHandlerByName(projectName, None, Session)
        else:
            multiIter = multiDirectoryIterator(directoryList, filefilt=filefilt)
            firstFile, size = multiIter.next()
            handler = getHandler(firstFile, Session, validate=True)
            if handler is None:
                raise ESGPublishError("No project found in file %s, specify with --project." % firstFile)
            projectName = handler.name

        if not readFiles:
            datasetMap = handler.generateDirectoryMap(directoryList, filefilt, datasetName=datasetName)
        else:
            datasetMap = handler.generateDirectoryMapFromFiles(directoryList, filefilt, datasetName=datasetName)

        # Output the map
        keys = datasetMap.keys()
        keys.sort()
        for datasetId in keys:
            direcTuple = datasetMap[datasetId]
            direcTuple.sort()
            for nodepath, filepath in direcTuple:

                # If readFiles is not set, generate a map entry for each file in the directory
                # that matches filefilt ...
                if not readFiles:
                    itr = directoryIterator(nodepath, filefilt=filefilt, followSubdirectories=False)
                # ... otherwise if readFiles is set, generate a map entry for each file
                else:
                    itr = fnIterator([filepath])

                for filepath, sizet in itr:
                    size, mtime = sizet
                    extraStuff = "mod_time=%f" % float(mtime)

                    if checksumClient is not None:
                        csum = checksum(filepath, checksumClient)
                        extraStuff += " | checksum=%s | checksum_type=%s" % (csum, checksumType)

                    # Print the map entry if:
                    # - The map is being created, not appended, or
                    # - The existing map does not have the dataset, or
                    # - The existing map has the dataset, but not the file.
                    if (
                        (appendMap is None)
                        or (not appendMap.has_key(datasetId))
                        or ((filepath, "%d" % size) not in appendMap[datasetId])
                    ):
                        print >> output, "%s | %s | %d | %s" % (datasetId, filepath, size, extraStuff)
    else:  # offline
        if projectName is not None:
            handler = getHandlerByName(projectName, None, Session, offline=True)
        else:
            raise ESGPublishError("Must specify --project for offline datasets.")
        listerSection = getOfflineLister(config, "project:%s" % projectName, service)
        offlineLister = config.get(listerSection, "offline_lister_executable")
        commandArgs = "--config-section %s " % listerSection
        commandArgs += " ".join(directoryList)
        for dsetName, filepath, sizet in processNodeMatchIterator(
            offlineLister, commandArgs, handler, filefilt=filefilt, datasetName=datasetName, offline=True
        ):
            size, mtime = sizet
            extrastuff = ""
            if mtime is not None:
                extrastuff = "| mod_time=%f" % float(mtime)
            if (
                (appendMap is None)
                or (not appendMap.has_key(dsetName))
                or ((filepath, "%d" % size) not in appendMap[dsetName])
            ):
                print >> output, "%s | %s | %d %s" % (dsetName, filepath, size, extrastuff)

    if output is not sys.stdout:
        output.close()

Example #12

Show file

def main(argv):

    try:
        args, lastargs = getopt.getopt(argv, "hi:", [
            'database-delete', 'database-only', 'echo-sql', 'map=',
            'no-republish', 'no-thredds-reinit', 'skip-gateway', 'skip-index',
            'las', 'log=', 'rest-api', 'skip-thredds', 'sync-thredds',
            'use-list='
        ])
    except getopt.error:
        print sys.exc_value
        return

    deleteAll = False
    datasetMap = None
    deleteDset = False
    unpublishOnGateway = False
    echoSql = False
    init_file = None
    gatewayOp = DELETE
    las = False
    log_filename = None
    republish = True
    restApi = None
    thredds = True
    syncThredds = False
    useList = False
    threddsReinit = True
    for flag, arg in args:
        if flag == '--database-delete':
            deleteDset = True
        elif flag == '--database-only':
            gatewayOp = NO_OPERATION
            thredds = False
            deleteDset = True
        elif flag == '--echo-sql':
            echoSql = True
        elif flag in ['-h', '--help']:
            return
        elif flag == '-i':
            init_file = arg
        elif flag == '--map':
            datasetMap = readDatasetMap(arg)
        elif flag == '--skip-gateway':
            gatewayOp = NO_OPERATION
        elif flag == '--skip-index':
            gatewayOp = NO_OPERATION
        elif flag == '--las':
            las = True
        elif flag == '--log':
            log_filename = arg
        elif flag == '--no-republish':
            republish = False
        elif flag == '--no-thredds-reinit':
            threddsReinit = False
        elif flag == '--rest-api':
            restApi = True
        elif flag == '--skip-thredds':
            thredds = False
        elif flag == '--sync-thredds':
            syncThredds = True
        elif flag == '--use-list':
            useList = True
            useListPath = arg

    if gatewayOp != NO_OPERATION and unpublishOnGateway:
        gatewayOp = UNPUBLISH

    # Load the configuration and set up a database connection
    config = loadConfig(init_file)
    engine = create_engine(config.getdburl('extract'),
                           echo=echoSql,
                           pool_recycle=3600)
    initLogging('extract', override_sa=engine, log_filename=log_filename)
    Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)

    if config is None:
        raise ESGPublishError("No configuration file found.")
    threddsRoot = config.get('DEFAULT', 'thredds_root')

    # Get the default publication interface (REST or Hessian)
    if restApi is None:
        restApi = config.getboolean('DEFAULT', 'use_rest_api', default=False)

    if datasetMap is None:
        if not useList:
            datasetNames = [parseDatasetVersionId(item) for item in lastargs]
        else:
            if useListPath == '-':
                namelist = sys.stdin
            else:
                namelist = open(useListPath)
            datasetNames = []
            for line in namelist.readlines():
                versionId = parseDatasetVersionId(line.strip())
                datasetNames.append(versionId)
    else:
        datasetNames = datasetMap.keys()
        datasetNames.sort()
    result = deleteDatasetList(datasetNames,
                               Session,
                               gatewayOp,
                               thredds,
                               las,
                               deleteDset,
                               deleteAll=deleteAll,
                               republish=republish,
                               reinitThredds=threddsReinit,
                               restInterface=restApi)

    # Republish previous versions as needed. This will happen if the latest version
    # was deleted from the database, and is not
    # the only version. In this case the previous version will be rescanned to generate the aggregations.
    if republish:
        statusDict, republishList = result
        if len(republishList) > 0:

            # Register project handlers.
            registerHandlers()

            info("Republishing modified datasets:")
            republishDatasetNames = [
                generateDatasetVersionId(dsetTuple)
                for dsetTuple in republishList
            ]
            dmap, offline = queryDatasetMap(republishDatasetNames, Session)
            datasetNames = dmap.keys()
            datasets = iterateOverDatasets(None,
                                           dmap,
                                           None,
                                           republishList,
                                           Session,
                                           "time",
                                           UPDATE_OP,
                                           None, {},
                                           offline, {},
                                           forceAggregate=True)
            republishOp = (gatewayOp != NO_OPERATION
                           )  # Don't republish if skipping the gateway op
            result = publishDatasetList(datasetNames,
                                        Session,
                                        publish=republishOp,
                                        thredds=thredds)

    # Synchronize database and THREDDS catalogs
    if syncThredds:
        threddsRoot = config.get('DEFAULT', 'thredds_root')

        # Make a dictionary of catalogs from the database
        session = Session()
        subcatalogs = session.query(Catalog).select_from(
            join(Catalog, Dataset,
                 Catalog.dataset_name == Dataset.name)).all()
        catdict = {}
        for catalog in subcatalogs:
            location = os.path.join(threddsRoot, catalog.location)
            catdict[location] = 1
        session.close()

        # Scan all XML files in the threddsroot
        os.path.walk(threddsRoot, cleanupCatalogs, catdict)

Example #13

Show file

File: esgpublish.py Project: lukaszlacinski/esgf-ingestion

def main(argv):

    try:
        args, lastargs = getopt.getopt(argv, "a:cdehi:m:p:ru", ['append', 'create', 'dataset=', 'delete-files', 'echo-sql', 'experiment=', 'filter=', 'help', 'keep-version', 'log=', 'map=', 'message=', 'model=', 'offline',  'parent=', 'per-time', 'per-variable', 'project=', 'property=', 'publish', 'new-version=', 'no-thredds-reinit', 'noscan', 'read-directories', 'read-files', 'rename-files', 'replace', 'replica=', 'rest-api', 'service=', 'set-replica', 'summarize-errors', 'thredds', 'thredds-reinit', 'update', 'use-existing=', 'use-list=', 'validate=', 'version-list=', 'nodbwrite'])
    except getopt.error:
        print sys.exc_value
        return

    aggregateDimension = "time"
    datasetMapfile = None
    datasetName = None
    echoSql = False
    filefilt = '.*\.nc$'
    init_file = None
    initcontext = {}
    keepVersion = False
    las = False
    log_filename = None
    masterGateway = None
    message = None
    offline = False
    parent = None
    perVariable = None
    projectName = None
    properties = {}
    publish = False
    publishOnly = False
    publishOp = CREATE_OP
    readFiles = False
    rescan = False
    rescanDatasetName = []
    restApi = None
    schema = None
    service = None
    summarizeErrors = False
    testProgress1 = testProgress2 = None
    thredds = False
    threddsReinit = None
    version = None
    versionList = None
    nodbwrite = False

    for flag, arg in args:
        if flag=='-a':
            aggregateDimension = arg
        elif flag=='--append':
            publishOp = UPDATE_OP
        elif flag in ['-c', '--create']:
            publishOp = CREATE_OP
        elif flag=='--dataset':
            datasetName = arg
        elif flag in ['-d', '--delete-files']:
            publishOp = DELETE_OP
        elif flag=='--echo-sql':
            echoSql = True
        elif flag=='--experiment':
            initcontext['experiment'] = arg
        elif flag=='--filter':
            filefilt = arg
        elif flag in ['-h', '--help']:
            print usage
            sys.exit(0)
        elif flag=='-i':
            init_file = arg
        elif flag=='--keep-version':
            keepVersion = True
        elif flag=='--log':
            log_filename = arg
        elif flag=='--map':
            datasetMapfile = arg
        elif flag in ['-m', '--message']:
            message = arg
        elif flag=='--model':
            initcontext['model'] = arg
        elif flag=='--nodbwrite':
            nodbwrite = True
        elif flag=='--new-version':
            try:
                version = string.atoi(arg)
                if version <=0:
                    raise ValueError
            except ValueError:
                raise ESGPublishError("Version number must be a positive integer: %s"%arg)
        elif flag=='--no-thredds-reinit':
            threddsReinit = False
        elif flag=='--noscan':
            publishOnly = True
        elif flag=='--offline':
            offline = True
        elif flag=='--parent':
            parent = arg
        elif flag=='--per-time':
            perVariable = False
        elif flag=='--per-variable':
            perVariable = True
        elif flag=='--project':
            projectName = arg
        elif flag in ['-p', '--property']:
            name, value = arg.split('=')
            properties[name] = value
        elif flag=='--publish':
            publish = True
        elif flag in ['-e', '--read-directories']:
            readFiles = False
        elif flag=='--read-files':
            readFiles = True
        elif flag=='--rename-files':
            publishOp = RENAME_OP
        elif flag in ['-r', '--replace']:
            publishOp = REPLACE_OP
        elif flag=='--replica':
            masterGateway = arg
            warning("The --replica option is deprecated. Use --set-replica instead")
        elif flag=='--rest-api':
            restApi = True
        elif flag=='--service':
            service = arg
        elif flag=='--set-replica':
            masterGateway = 'DEFAULT'
        elif flag=='--summarize-errors':
            summarizeErrors = True
        elif flag=='--thredds':
            thredds = True
        elif flag=='--thredds-reinit':
            threddsReinit = True
        elif flag in ['-u', '--update']:
            publishOp = UPDATE_OP
        elif flag=='--use-existing':
            rescan = True
            rescanDatasetName.append(arg)
        elif flag=='--use-list':
            rescan = True
            if arg=='-':
                namelist=sys.stdin
            else:
                namelist = open(arg)
            for line in namelist.readlines():
                line = line.strip()
                if line[0]!='#':
                    rescanDatasetName.append(line)
        elif flag=='--validate':
            schema = arg
            restApi = True
        elif flag=='--version-list':
            versionList = arg

    # If offline, the project must be specified
    if offline and (projectName is None):
        raise ESGPublishError("Must specify project with --project for offline datasets")

    if version is not None and versionList is not None:
        raise ESGPublishError("Cannot specify both --new-version and --version-list")

    if versionList is not None:
        version = {}
        f = open(versionList)
        lines = f.readlines()
        f.close()
        for line in lines:
            line = line.strip()
            dsid, vers = line.split('|')
            dsid = dsid.strip()
            vers = int(vers.strip())
            version[dsid] = vers

    # Load the configuration and set up a database connection
    config = loadConfig(init_file)
    engine = create_engine(config.getdburl('extract'), echo=echoSql, pool_recycle=3600)
    initLogging('extract', override_sa=engine, log_filename=log_filename)
    Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)

    # Register project handlers
    registerHandlers()

    # Get the default publication interface (REST or Hessian)
    if restApi is None:
        restApi = config.getboolean('DEFAULT', 'use_rest_api', default=False)

    # If the dataset map is input, just read it ...
    dmap = None
    directoryMap = None
    extraFields = None
    if datasetMapfile is not None:
        dmap, extraFields = readDatasetMap(datasetMapfile, parse_extra_fields=True)
        datasetNames = dmap.keys()

    elif rescan:
        # Note: No need to get the extra fields, such as mod_time, since
        # they are already in the database, and will be used for file comparison if necessary.
        dmap, offline = queryDatasetMap(rescanDatasetName, Session)
        datasetNames = dmap.keys()

    # ... otherwise generate the directory map.
    else:
        # Online dataset(s)
        if not offline:
            if len(lastargs)==0:
                print "No directories specified."
                return

            if projectName is not None:
                handler = getHandlerByName(projectName, None, Session)
            else:
                multiIter = multiDirectoryIterator(lastargs, filefilt=filefilt)
                firstFile, size = multiIter.next()
                listIter = list(multiIter)
                handler = getHandler(firstFile, Session, validate=True)
                if handler is None:
                    raise ESGPublishError("No project found in file %s, specify with --project."%firstFile)
                projectName = handler.name

            props = properties.copy()
            props.update(initcontext)
            if not readFiles:
                directoryMap = handler.generateDirectoryMap(lastargs, filefilt, initContext=props, datasetName=datasetName)
            else:
                directoryMap = handler.generateDirectoryMapFromFiles(lastargs, filefilt, initContext=props, datasetName=datasetName)
            datasetNames = [(item,-1) for item in directoryMap.keys()]

        # Offline dataset. Format the spec as a dataset map : dataset_name => [(path, size), (path, size), ...]
        else:
            handler = getHandlerByName(projectName, None, Session, offline=True)
            dmap = {}
            listerSection = getOfflineLister(config, "project:%s"%projectName, service)
            offlineLister = config.get(listerSection, 'offline_lister_executable')
            commandArgs = "--config-section %s "%listerSection
            commandArgs += " ".join(lastargs)
            for dsetName, filepath, sizet in processNodeMatchIterator(offlineLister, commandArgs, handler, filefilt=filefilt, datasetName=datasetName, offline=True):
                size, mtime = sizet
                if dmap.has_key((dsetName,-1)):
                    dmap[(dsetName,-1)].append((filepath, str(size)))
                else:
                    dmap[(dsetName,-1)] = [(filepath, str(size))]

            datasetNames = dmap.keys()

    datasetNames.sort()
    if len(datasetNames)==0:
        warning("No datasets found.")
        min_version = -1
    else:
        min_version = sorted(datasetNames, key=lambda x: x[1])[0][1]

    # Must specify version for replications
    if min_version == -1 and masterGateway is not None and version is None and versionList is None:
        raise ESGPublishError("Must specify version with --new-version (or --version-list) for replicated datasets")
    
    # Iterate over datasets
    if not publishOnly:

#        pdb.set_trace()

        datasets = iterateOverDatasets(projectName, dmap, directoryMap, datasetNames, Session, aggregateDimension, publishOp, filefilt, initcontext, offline, properties, keepVersion=keepVersion, newVersion=version, extraFields=extraFields, masterGateway=masterGateway, comment=message, readFiles=readFiles, nodbwrite=nodbwrite)


    if (not nodbwrite):
        result = publishDatasetList(datasetNames, Session, publish=publish, thredds=thredds, las=las, parentId=parent, service=service, perVariable=perVariable, reinitThredds=threddsReinit, restInterface=restApi, schema=schema)
    # print `result`

    if summarizeErrors:
        print 'Summary of errors:'
        for name,versionno in datasetNames:
            dset = Dataset.lookup(name, Session)
            print dset.get_name(Session), dset.get_project(Session), dset.get_model(Session), dset.get_experiment(Session), dset.get_run_name(Session)
            if dset.has_warnings(Session):
                print '=== Dataset: %s ==='%dset.name
                for line in dset.get_warnings(Session):
                    print line