Esempio n. 1
0
def prepare(package_id, force=False):
    packageInfo = collections.get("package").find_one({
        "packageId" : package_id,
    },{"_id" : 0})

    # create a workspace package object if required
    if packageInfo is None:
        packageInfo = {
            "packageId" : package_id
        }

    # quit if there is nothing todo
    if not force and packageInfo.get("prepared") == True:
        return {
            "success" : True,
            "message" : "already prepared, use force flag to force prepare"
        }

    # make sure we are not trying to prepare a package that has been deleted
    ckanPackage = ckanPackageQuery.get(package_id)
    if ckanPackage.get('state') == 'deleted':
        raise Exception('Package has been deleted')

    # get all package resources
    resources = ckanResourceQuery.active(package_id)

    status = []
    for resource in resources:
        # get path on disk for file as well as file extension
        filepath = resourceUtil.get_path(resource.get('id'))
        ext = _getFileExtension(resource.get('name'))

        # extract zip contents if zip
        if ext == "zip":
            # TODO: we should be checking a zip hash before we go unzipping every time
            results = extractZip(package_id, resource.get('id'), filepath, resource.get('name'))
            for result in results:
                status.append(result)

        # extract 'normal' file (non-zip)
        else:
            result = importer.processFile(filepath, package_id, resource.get('id'), resource=resource)
            status.append(result)

    # respond with update of what we did (or did not) do.
    packageInfo["runInfo"] = status
    packageInfo["lastTouched"] = datetime.utcnow()
    packageInfo["prepared"] = True

    collections.get("package").update({"packageId":package_id}, packageInfo, upsert=True)
    return packageInfo
Esempio n. 2
0
def get(package_id):
    # get all package resources
    resources = ckanResourceQuery.active(package_id)

    response = {
        "package" : collections.get("package").find_one({
            "packageId": package_id,
        }, {"runInfo": 0, "_id": 0}),
        "resources" : [],
        "ckan" : {
            "package" : ckanPackageQuery.get(package_id),
            "resources" : resources
        },
        "pushed" : isPushed(package_id)
    }

    if response['package'] is None:
        response['package'] = {}

    # append information about the dataset resources to response
    for resource in resources:
        sheets = getResource(resource.get('id'))

        upload = uploader.ResourceUpload(resource)
        path = upload.get_path(resource['id'])
        if os.path.exists(path):
            resource['file_size'] = os.path.getsize(path)
        else:
            resource['file_size'] = 0

        for sheet in sheets:
            # we don't care about root excel files, only the sheets
            if sheet.get('excel') == True or sheet.get('isZip') == True:
                continue

            response.get('resources').append(sheet)

    return response
Esempio n. 3
0
def get(packageId="", resourceId=None, sheetId=None, index=0, showProcessInfo=False, must_be_valid=False, clean_wavelengths=True):
    # build out query
    query = {
        "type" : "data",
        "packageId" : packageId
    }

    # you can limit by resource and sheet id if you want
    if resourceId is not None:
        query["resourceId"] = resourceId
    if sheetId is not None:
        query["sheetId"] = sheetId

    # get spectra at index
    main = collections.get('spectra').find_one(query, skip=index, sort=[("index", pymongo.ASCENDING)])

    if main == None:
        raise Exception('Unabled to get spectra from package_id: %s at index %s' % (packageId, index))

    # the collection also contains config information about the spectra, just grab to spectra attribute
    spectra = main.get('spectra')

    # this also replaces , with .
    # also moves measurement waveslength keys to 'datapoints' object
    moveWavelengths(spectra, clean_wavelengths)

    if must_be_valid:
        if 'datapoints' not in spectra:
            return {}
        if len(spectra['datapoints']) == 0:
            return {}

    # get information for the sheet this spectra came from
    sheetInfo = collections.get('resource').find_one({
        "packageId": packageId,
        "resourceId": main.get("resourceId"),
        "sheetId" : main.get("sheetId")
    })

    # get package information for the package this spectra came from
    package = ckanPackageQuery.get(packageId)

    attributeProcessInfo = []

    # join together metadata to this spectra
    join(packageId, spectra, attributeProcessInfo)

    config = collections.get('package').find_one({"packageId": packageId})
    if config == None:
        config = {}

    # set the spectra attribute aliases
    mapNames(spectra, config, attributeProcessInfo, package)

    # lookup any usda code given
    usda.setCodes(spectra, info=attributeProcessInfo)

    # strip controlled vocab fields.  Remove any values that are not part of the controlled
    # vocabulary
    controlledVocab.enforce(spectra)

    # add 'spectra.ecosis' attribute with package and sheet info
    if showProcessInfo:
        addEcosisNamespace(spectra, package, main, sheetInfo, processInfo=attributeProcessInfo)
    else:
        addEcosisNamespace(spectra, package, main, sheetInfo)

    # set the sort information.  This data needs to be of the correct type (string, number, date) for
    # proper sorting in mongodb
    setSort(spectra, config, package)

    # set the location information.  Needs to be proper geojson if it's going to be used
    setLocation(spectra)

    # set photo
    setPhoto(packageId, spectra)

    return spectra