Ejemplo n.º 1
0
    def build_hierarchy(self, geonames_id, fcodes=None):
        """
        Accepts a geonames id and fetches a hierarchy of features from
        the API, returning them as a list of geoname items.
        """
        hierarchy = []
        params = {
            "geonameId": str(geonames_id),
            "username": module_config().get("geonames_username"),
            "token": module_config().get("geonames_token")
        }
        url = DplaGeonamesGeocoder.base_uri + \
              "hierarchyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if result.get('geonames'):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]
            else:
                return hierarchy
        # Return only the requested fcodes
        for feature in DplaGeonamesGeocoder.resultCache.get(url):
            if (("fcode" in feature and feature["fcode"] in fcodes)
                    or fcodes is None):
                hierarchy.append(feature)

        return hierarchy
Ejemplo n.º 2
0
    def reverse_geocode(self, lat, lng):
        """
        Accepts latitude and longitude values and returns a geonames place
        that matches their value.
        """
        params = {
            "lat": lat,
            "lng": lng,
            "username": module_config().get("geonames_username"),
            "token": module_config().get("geonames_token")
        }
        url = DplaGeonamesGeocoder.base_uri + \
              "findNearbyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if ("geonames" in result \
                and len(result["geonames"]) > 0):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0]
            else:
                logger.error("Could not reverse geocode (%s, %s)" % (
                    lat,
                    lng,
                ))
                return None

        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 3
0
    def _name_search(self, name, params={}):
        defaults = { "q": name.encode("utf8"),
                     "maxRows": 15,
                     "username": module_config().get("geonames_username"),
                     "token": module_config().get("geonames_token") }
        params = dict(defaults.items() + params.items())

        url = DplaGeonamesGeocoder.base_uri + "searchJSON?%s" % \
              urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if result.get('geonames'):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]
            else:
                return []
        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 4
0
def update_document(document, filepath, mime, status):
    """
    Updates the document with a filepath of downloaded thumbnail..

    Arguments:
        document object - document for updating (decoded by json module)
        filepath string - filepath to insert

    Returns:
        The document from parameter with additional field containing the
        filepath.
    """
    if filepath:
        base_url = module_config().get('thumbs_root_url')
        obj = document["object"]
        obj["@id"] = base_url + filepath
        obj["format"] = mime
        document["object"] = obj
    if mime:
        obj = document["object"]
        obj["format"] = mime
    if status:
        setprop(document, "admin/object_status", status)

    return document
Ejemplo n.º 5
0
    def reverse_geocode(self, lat, lng):
        params = { "lat": lat,
                   "lng": lng,
                   "username": module_config().get("geonames_username"),
                   "token": module_config().get("geonames_token") }
        url = "http://ws.geonames.net/findNearbyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = json.loads(util.decode_page(urlopen(url)))
            if ("geonames" in result \
                and len(result["geonames"]) > 0):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0]
            else: 
                logger.error("Could not reverse geocode (%s, %s)" % (lat, lng,)) 
                return None

        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 6
0
def update_document(document, filepath, mime, status):
    """
    Updates the document with a filepath of downloaded thumbnail..

    Arguments:
        document object - document for updating (decoded by json module)
        filepath string - filepath to insert

    Returns:
        The document from parameter with additional field containing the
        filepath.
    """
    if filepath:
        base_url = module_config().get('thumbs_root_url')
        obj = document["object"]
        obj["@id"] = base_url + filepath
        obj["format"] = mime
        document["object"] = obj
    if mime:
        obj = document["object"]
        obj["format"] = mime
    if status:
        setprop(document, "admin/object_status", status)

    return document
Ejemplo n.º 7
0
 def __init__(self):
     self.base_url = module_config().get('twofishes_base_url')
     self.params = {
         'lang': 'en',
         'responseIncludes': 'PARENTS,DISPLAY_NAME',
         'maxInterpretations': 1
     }
Ejemplo n.º 8
0
    def _name_search(self, name, params={}):
        defaults = {
            "q": name.encode("utf8"),
            "maxRows": 15,
            "username": module_config().get("geonames_username"),
            "token": module_config().get("geonames_token")
        }
        params = dict(defaults.items() + params.items())

        url = DplaGeonamesGeocoder.base_uri + "searchJSON?%s" % \
              urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if result.get('geonames'):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]
            else:
                return []
        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 9
0
    def reverse_geocode_hierarchy(self, lat, lng, fcodes=None):
        hierarchy = []

        geonames_item = self.reverse_geocode(lat, lng)
        if (geonames_item):
            params = { "geonameId": geonames_item["geonameId"],
                       "username": module_config().get("geonames_username"),
                       "token": module_config().get("geonames_token") }
            url = "http://ws.geonames.net/hierarchyJSON?%s" % urlencode(params)
            if (url not in DplaGeonamesGeocoder.resultCache):
                result = json.loads(util.decode_page(urlopen(url)))
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]
                
            # Return only the requested fcodes
            for place in DplaGeonamesGeocoder.resultCache[url]:
                if (("fcode" in place and place["fcode"] in fcodes) \
                    or fcodes is None):
                    hierarchy.append(place)
                    
        return hierarchy 
Ejemplo n.º 10
0
def test_environment():
    #
    assert akara.raw_config is not None
    assert akara.module_config("Akara") is not None
    assert akara.module_config("Akara")["Listen"] is not None
    assert akara.module_config("Akara").get("Listen") == akara.module_config("Akara")["Listen"]
    assert akara.module_config("Akara").get("XYZListen", 123) == 123
    assert akara.module_config("Akara").require("Listen", "SHRDLU")
    try:
        akara.module_config("Akara").require("XYZListen", "SHRDLU")
        raise AssertionError
    except Exception, err:
        assert "SHRDLU" in str(err)
Ejemplo n.º 11
0
    def reverse_geocode(self, lat, lng):
        params = {
            "lat": lat,
            "lng": lng,
            "username": module_config().get("geonames_username"),
            "token": module_config().get("geonames_token")
        }
        url = "http://ws.geonames.net/findNearbyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = json.loads(util.decode_page(urlopen(url)))
            if ("geonames" in result \
                and len(result["geonames"]) > 0):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0]
            else:
                logger.error("geocode: Could not reverse geocode (%s, %s)" % (
                    lat,
                    lng,
                ))
                return None

        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 12
0
    def reverse_geocode(self, lat, lng):
        """
        Accepts latitude and longitude values and returns a geonames place
        that matches their value.
        """
        params = { "lat": lat,
                   "lng": lng,
                   "username": module_config().get("geonames_username"),
                   "token": module_config().get("geonames_token") }
        url = DplaGeonamesGeocoder.base_uri + \
              "findNearbyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if ("geonames" in result \
                and len(result["geonames"]) > 0):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0]
            else:
                logger.error("Could not reverse geocode (%s, %s)" %
                             (lat, lng,))
                return None

        return DplaGeonamesGeocoder.resultCache[url]
Ejemplo n.º 13
0
def find_conversion_dictionary(mapping_key):
    """Finds the dictionary with values to use for conversion.

    Args:
        mapping_key (Str): Name of conversion key read from Akara.conf

    Returns:
        Dictionary used for converting values.
    """
    # Mapping should be in akara.conf
    mapping = module_config().get('lookup_mapping')
    dict_name = mapping[mapping_key].upper()
    return globals()[dict_name]
Ejemplo n.º 14
0
    def reverse_geocode_hierarchy(self, lat, lng, fcodes=None):
        hierarchy = []

        geonames_item = self.reverse_geocode(lat, lng)
        if (geonames_item):
            params = {
                "geonameId": geonames_item["geonameId"],
                "username": module_config().get("geonames_username"),
                "token": module_config().get("geonames_token")
            }
            url = "http://ws.geonames.net/hierarchyJSON?%s" % urlencode(params)
            if (url not in DplaGeonamesGeocoder.resultCache):
                result = json.loads(util.decode_page(urlopen(url)))
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]

            # Return only the requested fcodes
            for place in DplaGeonamesGeocoder.resultCache[url]:
                if (("fcode" in place and place["fcode"] in fcodes) \
                    or fcodes is None):
                    hierarchy.append(place)

        return hierarchy
Ejemplo n.º 15
0
def find_conversion_dictionary(mapping_key):
    """Finds the dictionary with values to use for conversion.

    Args:
        mapping_key (Str): Name of conversion key read from Akara.conf

    Returns:
        Dictionary used for converting values.
    """
    # Mapping should be in akara.conf
    mapping = module_config().get("lookup_mapping")
    dict_name = mapping[mapping_key].upper()
    return globals()[dict_name]
Ejemplo n.º 16
0
def find_conversion_dictionary(mapping_key):
    """Finds the dictionary with values to use for conversion.

    Args:
        mapping_key (Str): Name of conversion key read from Akara.conf

    Returns:
        Dictionary used for converting values.
    """
    # Mapping should be in akara.conf
    mapping = module_config().get('lookup_mapping')
    logger.debug("Looking for mapping using key [%s]" % mapping_key)
    dict_name = mapping[mapping_key].upper()
    logger.debug("Found substitution dict [%s] for key mapping [%s]" % (dict_name, mapping_key,))
    return globals()[dict_name]
Ejemplo n.º 17
0
    def build_hierarchy(self, geonames_id, fcodes=None):
        """
        Accepts a geonames id and fetches a hierarchy of features from
        the API, returning them as a list of geoname items.
        """
        hierarchy = []
        params = { "geonameId": str(geonames_id),
                   "username": module_config().get("geonames_username"),
                   "token": module_config().get("geonames_token") }
        url = DplaGeonamesGeocoder.base_uri + \
              "hierarchyJSON?%s" % urlencode(params)
        if (url not in DplaGeonamesGeocoder.resultCache):
            result = DplaGeonamesGeocoder._get_result(url)
            if result.get('geonames'):
                DplaGeonamesGeocoder.resultCache[url] = result["geonames"]
            else:
                return hierarchy
        # Return only the requested fcodes
        for feature in DplaGeonamesGeocoder.resultCache.get(url):
            if (("fcode" in feature and feature["fcode"] in fcodes) or
                fcodes is None):
                hierarchy.append(feature)

        return hierarchy
Ejemplo n.º 18
0
def find_conversion_dictionary(mapping_key):
    """Finds the dictionary with values to use for conversion.

    Args:
        mapping_key (Str): Name of conversion key read from Akara.conf

    Returns:
        Dictionary used for converting values.
    """
    # Mapping should be in akara.conf
    mapping = module_config().get('lookup_mapping')
    logger.debug("Looking for mapping using key [%s]" % mapping_key)
    dict_name = mapping[mapping_key].upper()
    logger.debug("Found substitution dict [%s] for key mapping [%s]" % (
        dict_name,
        mapping_key,
    ))
    return globals()[dict_name]
Ejemplo n.º 19
0
from akara import logger, response, module_config
from akara.services import simple_service
from amara.thirdparty import json
import dplaingestion.itemtype as itemtype

type_for_type_keyword = \
        module_config('enrich_type').get('type_for_ot_keyword')
type_for_format_keyword = \
        module_config('enrich_type').get('type_for_phys_keyword')


@simple_service('POST', 'http://purl.org/la/dp/enrich-type', 'enrich-type',
                'application/json')
def enrichtype(body,
               ctype,
               action="enrich-type",
               prop="sourceResource/type",
               format_field="sourceResource/format",
               default=None,
               send_rejects_to_format=False):
    """
    Service that accepts a JSON document and enriches the "type" field of that
    document by:

    By default works on the 'type' field, but can be overridden by passing the
    name of the field to use as a parameter.

    A default type, if none can be determined, may be specified with the
    "default" querystring parameter.  If no default is given, the type field
    will be unmodified, or not added, in the result.
    """
Ejemplo n.º 20
0
'''

import amara
from amara.xslt import transform
from amara.xpath.util import simplify
from amara.bindery import html
from amara.lib import irihelpers, inputsource

import akara
from akara.services import simple_service
from akara import response

XSLT_SERVICE_ID = 'http://purl.org/akara/services/demo/xslt'
XPATH_SERVICE_ID = 'http://purl.org/akara/services/demo/xpath'

DEFAULT_TRANSFORM = akara.module_config().get('default_transform')
URI_SPACE = akara.module_config().get('uri_space',
                                      'http://github.com/zepheira').split()
#print DEFAULT_TRANSFORM

#FIXME! The core URI auth code is tested, but not the use in this module
if URI_SPACE == '*':
    #Allow all URI access
    ALLOWED = [(True, True)]
else:
    ALLOWED = []
    for baseuri in URI_SPACE:
        #dAd a rule that permits URIs starting with this URISPACE item
        #FIXME: Technically should normalize uri and base, but this will work for most cases
        ALLOWED.append((lambda uri, base=baseuri: uri.startswith(base), True))
Ejemplo n.º 21
0
from amara.lib.iri import split_fragment, split_uri_ref, unsplit_uri_ref, split_authority
#from amara import inputsource

# Akara Imports
from akara import module_config, logger, response
from akara.util import multipart_post_handler, wsgibase, http_method_handler, copy_headers_to_dict
from akara.services import method_dispatcher
from akara.util import status_response, read_http_body_to_temp
from akara.util import BadTargetError, HTTPAuthorizationError, MoinAuthorizationError, UnexpectedResponseError, MoinMustAuthenticateError, MoinNotFoundError, ContentLengthRequiredError, GenericClientError, ConflictError
import akara.util.moin as moin

# ======================================================================
#                         Module Configruation
# ======================================================================

TARGET_WIKIS = module_config().get("targets", {})
TARGET_WIKI_OPENERS = {}
DEFAULT_OPENER = urllib2.build_opener(
    urllib2.HTTPCookieProcessor(),
    multipart_post_handler.MultipartPostHandler)

# Specifies the default max-age of Moin pages
CACHE_MAX_AGE = module_config().get("CACHE_MAX_AGE", None)

# Specifies a Wiki path (currently only one, FIXME) under which no caching will occur
NO_CACHE_PATHS = module_config().get("NO_CACHE_PATHS", None)

# Look at each Wiki URL and build an appropriate opener object for retrieving
# pages.   If the URL includes HTTP authentication information such as
# http://user:[email protected]/mywiki, the opener is built with
# basic authentication enabled.   For details, see:
Ejemplo n.º 22
0
from akara.services import simple_service
from akara import request, response
from akara import module_config, logger
from akara.util import copy_headers_to_dict
from amara.thirdparty import json, httplib2
from amara.lib.iri import join
from urllib import quote
import datetime
import uuid
import base64

COUCH_DATABASE = module_config().get('couch_database')
COUCH_DATABASE_USERNAME = module_config().get('couch_database_username')
COUCH_DATABASE_PASSWORD = module_config().get('couch_database_password')

COUCH_ID_BUILDER = lambda src, lname: "--".join((src,lname))
# Set id to value of the first identifier, disambiguated w source. Not sure if
# an OAI handle is guaranteed or on what scale it's unique.
# FIXME it's looking like an id builder needs to be part of the profile. Or UUID as fallback?
COUCH_REC_ID_BUILDER = lambda src, rec: COUCH_ID_BUILDER(src,rec.get(u'id','no-id').strip().replace(" ","__"))

COUCH_AUTH_HEADER = { 'Authorization' : 'Basic ' + base64.encodestring(COUCH_DATABASE_USERNAME+":"+COUCH_DATABASE_PASSWORD) }

# FIXME: this should be JSON-LD, but CouchDB doesn't support +json yet
CT_JSON = {'Content-Type': 'application/json'}


H = httplib2.Http()
H.force_exception_as_status_code = True

# FIXME: should support changing media type in a pipeline
Ejemplo n.º 23
0
from akara import logger, response, module_config
from akara.services import simple_service
from amara.thirdparty import json
from dplaingestion.selector import delprop, getprop, setprop, exists
import dplaingestion.itemtype as itemtype
import re

type_for_type_keyword = \
        module_config('enrich_type').get('type_for_ot_keyword')
type_for_format_keyword = \
        module_config('enrich_type').get('type_for_phys_keyword')

@simple_service('POST', 'http://purl.org/la/dp/enrich-type', 'enrich-type',
                'application/json')
def enrichtype(body, ctype,
               action="enrich-type",
               prop="sourceResource/type",
               format_field="sourceResource/format",
               default=None,
               send_rejects_to_format=False):
    """   
    Service that accepts a JSON document and enriches the "type" field of that
    document by: 

    By default works on the 'type' field, but can be overridden by passing the
    name of the field to use as a parameter.

    A default type, if none can be determined, may be specified with the
    "default" querystring parameter.  If no default is given, the type field
    will be unmodified, or not added, in the result.
    """
Ejemplo n.º 24
0
        # This is a very simple implementation of conditional GET with
        # the Last-Modified header. It makes media files a bit speedier
        # because the files are only read off disk for the first request
        # (assuming the browser/client supports conditional GET).
        mtime = formatdate(os.stat(filename).st_mtime, usegmt=True)
        headers = [('Last-Modified', mtime)]
        if environ.get('HTTP_IF_MODIFIED_SINCE', None) == mtime:
            status = '304 Not Modified'
            output = ()
        else:
            status = '200 OK'
            mime_type = mimetypes.guess_type(filename)[0]
            if mime_type:
                headers.append(('Content-Type', mime_type))
            output = [fp.read()]
            fp.close()
        start_response(status, headers)
        return output


import akara

if not akara.module_config():
    akara.logger.warn("No configuration section found for %r" % (__name__, ))

paths = akara.module_config().get("paths", {})

for path, root in paths.items():
    handler = MediaHandler(root)
    registry.register_service(SERVICE_ID, path, handler)
Ejemplo n.º 25
0
from itertools import dropwhile

import amara
from amara import bindery
from amara.tools import atomtools
from amara.thirdparty import httplib2
from amara.lib.util import first_item
from amara.thirdparty import json

from akara.services import simple_service
from akara import request, response
from akara import logger, module_config


# These come from the akara.demos.atomtools section of the Akara configuration file
ENTRIES = module_config().warn("entries", "/path/to/entry/files/*.atom",
                               "glob path to Atom entries")

FEED_ENVELOPE = module_config().warn("feed_envelope",
'''<feed xmlns="http://www.w3.org/2005/Atom">
<title>This is my feed</title><id>http://example.com/my_feed</id>
</feed>''', "XML envelope around the Atom entries")


#text/uri-list from RFC 2483
SERVICE_ID = 'http://purl.org/akara/services/demo/atom.json'
@simple_service('GET', SERVICE_ID, 'akara.atom.json', 'application/json')
def atom_json(url):
    '''
    Convert Atom syntax to Exhibit JSON
    (see: http://www.ibm.com/developerworks/web/library/wa-realweb6/ ; this is based on listing 3)
    
Ejemplo n.º 26
0
VALUE_SET_TYPE = 'value_set'
VARIABLE_LABELS_TYPE = 'variable_labels'
VALUE_LABELS_TYPE = 'value_labels'

#R_SCRIPT = '''library(foreign)
#mydata <- read.spss(file='%s')
#write.csv2(mydata)
#'''

R_SCRIPT = '''library(Hmisc)
mydata <- spss.get(file='%s')
write.csv2(mydata)
'''

R_FILE_CMD = akara.module_config(__name__).get('r_command', 'r')

POR_REQUIRED = _("The 'POR' POST parameter is mandatory.")

SERVICE_ID = 'http://purl.org/akara/services/demo/spss.json'
@simple_service('POST', SERVICE_ID, 'spss.json', 'application/json')
def spss2json(body, ctype, **params):
    '''
    Uses GNU R to convert SPSS to JSON
    Optionally tries to guess long labels from an original .SPS file
    
    Requires POST body of multipart/form-data
    
    Sample request:
    curl -F "[email protected]" http://localhost:8880/spss.json
    curl -F "[email protected]" -F "[email protected]" http://localhost:8880/spss.json
Ejemplo n.º 27
0
"""

__author__ = 'aleksey'

import re

from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json
from akara import module_config

from dplaingestion import selector


IGNORE = module_config().get('IGNORE')
PENDING = module_config().get('PENDING')

@simple_service('POST', 'http://purl.org/la/dp/artstor_identify_object',
                'artstor_identify_object', 'application/json')
def artstor_identify_object(body, ctype, download="True"):

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    original_document_key = u"originalRecord"
    original_sources_key = u"handle"
Ejemplo n.º 28
0
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'):
    '''
    Adds geocode data to the record coming in as follows:
        1. Attempt to get a lat/lng coordinate from the property. We use Bing to
           lookup lat/lng from a string as it is much better than Geonames.
        2. For the given lat/lng coordinate, attempt to determine its parent
           features (county, state, country). We use Geonames to reverse geocode
           the lat/lng point and retrieve the location hierarchy.
    '''
    # logger.debug("Received: " + body)

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if (not exists(data, prop)):
        # logger.warn("geocode: COULD NOT FIND %s" % prop)
        pass
    else:
        logger.debug("Geocoding %s" % data["_id"])
        value = getprop(data, prop)
        for v in iterify(value):
            if coordinate(v["name"]):
                result = coordinate(v["name"])
            else:
                # Attempt to find this item's lat/lng coordinates
                result = DplaBingGeocoder(api_key=module_config().get(
                    "bing_api_key")).geocode_spatial(v)

            if (result):
                lat, lng = result
                v[newprop] = "%s, %s" % (
                    lat,
                    lng,
                )

                # Reverse-geocode this location to find country, state, and county parent places
                hierarchy = DplaGeonamesGeocoder().reverse_geocode_hierarchy(
                    lat,
                    lng,
                    [
                        "PCLI",  # Country
                        "ADM1",  # State
                        "ADM2"
                    ])  # County
                for place in hierarchy:
                    fcode = place["fcode"]
                    if ("PCLI" == place["fcode"]):
                        v["country"] = place["toponymName"]
                    elif ("ADM1" == place["fcode"]):
                        v["state"] = place["toponymName"]
                    elif ("ADM2" == place["fcode"]):
                        v["county"] = place["toponymName"]

                    # Deterine how close we are to the original coordinates, to see if this is the
                    # place that was geocoded and we should stop adding more specificity (e.g. if
                    # the record specified "South Carolina", we don't want to include the county
                    # that is located at the coordinates of South Carolina. We use increasing
                    # tolerance levels to account for differences between Bing and Geonames
                    # coordinates.
                    d = haversine((lat, lng), (place["lat"], place["lng"]))
                    if (("PCLI" == place["fcode"] and d < 50
                         )  # Country tolerance (Bing/Geonames 49.9km off) \
                            or ("ADM1" == place["fcode"]
                                and d < 15)):  # State tolerance
                        break
            else:
                logger.debug("geocode: No result found for %s" % v)

    return json.dumps(data)
Ejemplo n.º 29
0
from zen import spreadsheet as spreadsheetlib
from zen.temporal import smart_parse_date
from zen.csvhelper import readcsv
from zen.mods import mods2json#, MODS_NAMESPACE
#from zen.akamod import geolookup_service
from zen.whatfile import guess_imt
from zen.feeds import webfeed
from zen.exhibit import UNSUPPORTED_IN_EXHIBITKEY
from zen import ejsonify

from . import __version__ as VERSION

CHUNKCOUNT = 10

BIBTEX2MODS = module_config().get('bib2xml_command', 'bib2xml')
DIAGNOSTICS = module_config().get('dataload_diagnostics', False)

BIBTEX_IMT = ["application/x-bibtex", "text/x-bibtex"]

GENERIC_BINARY_IMT = 'application/octet-stream'
UNKNOWN_IMT = 'application/unknown'
UNKNOWN_TEXT_IMT = 'text/unknown'
EXCEL_IMTS = ['application/vnd.ms-excel', 'application/vnd.ms-office', 'application/msword', GENERIC_BINARY_IMT]

#FIXME: Will grow monotonically.  Switch to LRU algo
#CACHE = {}
EXHIBIT_RESERVED = ['label', 'id', 'type']

MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
Ejemplo n.º 30
0
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'):
    '''
    Adds geocode data to the record coming as follows:

    1. If the coordinates property does not exist, attempt to extract it from
       name.
    2. Run GeoNames enrichment, reverse encoding coordinate values to identify,
       parent features, or (if none exist) searching for name values. Put
       parent features in appropriate state/country values.
    3. If we still haven't identified the place, use Bing to get lat/long
       values. If one is found, pass the coordinates through Geonames again
       to identify parent features.
    4. Add any non-existing features to the spatial dictionary.
    '''
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"

    if (not exists(data, prop)):
        pass
    else:
        logger.debug("Geocoding %s" % data["_id"])
        value = getprop(data, prop)
        places = []
        for v in iterify(value):
            bing_geocode = True
            if not isinstance(v, dict):
                logger.error("Spatial value must be a dictionary; record %s" %
                             data["_id"])
                continue

            place = Place(v)

            if place.name:
                coords = get_coordinates(place.name)
                if coords:
                    place.coordinates = coords
                    place.name = None
                    place.set_name()

            # Run Geonames enrichment to do initial search
            place.enrich_geodata(DplaGeonamesGeocoder())

            # Don't enrich with geodata if place is 'United States'
            pattern = ur" *(United States(?!-)|États-Unis|USA)"
            if (place.name and re.search(pattern, place.name)):
                bing_geocode = False

            if bing_geocode:
                # Attempt to find this item's lat/lng coordinates
                if not place.coordinates:
                    api_key = module_config().get("bing_api_key")
                    place.enrich_geodata(DplaBingGeocoder(api_key=api_key))
                    # rerun geonames enrichment with new coordinates
                    place.enrich_geodata(DplaGeonamesGeocoder())

            if not place.validate():
                if not place.set_name():
                    logger.error("Spatial dictionary must have a " +
                                 "'name' property. Could not enhance input " +
                                 "data to include a name property; " +
                                 "record %s" % data["_id"])

            places.append(place)

        values = map(lambda x: x.to_map_json(), Place.merge_related(places))
        setprop(data, prop, values)

    return json.dumps(data)
Ejemplo n.º 31
0
# Make sure the environment is set up
@simple_service("GET", "http://example.com/test")
def test_environment():
    #
    assert akara.raw_config is not None
    assert akara.module_config("Akara") is not None
    assert akara.module_config("Akara")["Listen"] is not None
    assert akara.module_config("Akara").get("Listen") == akara.module_config("Akara")["Listen"]
    assert akara.module_config("Akara").get("XYZListen", 123) == 123
    assert akara.module_config("Akara").require("Listen", "SHRDLU")
    try:
        akara.module_config("Akara").require("XYZListen", "SHRDLU")
        raise AssertionError
    except Exception, err:
        assert "SHRDLU" in str(err)
    assert not akara.module_config("XYZAkara")  # should return False
    assert akara.module_config("Akara")  # should return True

    # simple services can access the WSGI environ this way
    assert request.environ is not None

    # Here's how to override the response fields
    assert response.code.startswith("200 ")
    assert response.headers == []
    return "Good!"


# Make sure the simple_service can specify the path
# (If not given, uses the function's name)
@simple_service("GET", "http://example.com/test", path="test.new.path")
def test_set_path():
Ejemplo n.º 32
0
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'):
    '''
    Adds geocode data to the record coming as follows:

    1. If the coordinates property does not exist, attempt to extract it from
       name.
    2. Run GeoNames enrichment, reverse encoding coordinate values to identify,
       parent features, or (if none exist) searching for name values. Put
       parent features in appropriate state/country values.
    3. If we still haven't identified the place, use Bing to get lat/long
       values. If one is found, pass the coordinates through Geonames again
       to identify parent features.
    4. Add any non-existing features to the spatial dictionary.
    '''
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if (not exists(data, prop)):
        pass
    else:
        logger.debug("Geocoding %s" % data["_id"])
        value = getprop(data, prop)
        places = []
        for v in iterify(value):
            bing_geocode = True
            if not isinstance(v, dict):
                logger.error("Spatial value must be a dictionary; record %s" %
                             data["_id"])
                continue

            place = Place(v)

            if place.name:
                coords = get_coordinates(place.name)
                if coords:
                    place.coordinates = coords
                    place.name = None
                    place.set_name()

            # Run Geonames enrichment to do initial search
            place.enrich_geodata(DplaGeonamesGeocoder())

            # Don't enrich with geodata if place is 'United States'
            pattern = ur" *(United States(?!-)|États-Unis|USA)"
            if (place.name and re.search(pattern, place.name)):
                bing_geocode = False

            if bing_geocode:
                # Attempt to find this item's lat/lng coordinates
                if not place.coordinates:
                    api_key = module_config().get("bing_api_key")
                    place.enrich_geodata(DplaBingGeocoder(api_key=api_key))
                    # rerun geonames enrichment with new coordinates
                    place.enrich_geodata(DplaGeonamesGeocoder())

            if not place.validate():
                if not place.set_name():
                    logger.error("Spatial dictionary must have a " +
                                 "'name' property. Could not enhance input " +
                                 "data to include a name property; " +
                                 "record %s" % data["_id"])

            places.append(place)

        values = map(lambda x: x.to_map_json(), Place.merge_related(places))
        setprop(data, prop, values)

    return json.dumps(data)
Ejemplo n.º 33
0
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'):
    '''
    Adds geocode data to the record coming in as follows:
        1. Attempt to get a lat/lng coordinate from the property. We use Bing to
           lookup lat/lng from a string as it is much better than Geonames.
        2. For the given lat/lng coordinate, attempt to determine its parent
           features (county, state, country). We use Geonames to reverse geocode
           the lat/lng point and retrieve the location hierarchy.
    '''
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"

    if (not exists(data, prop)): 
        pass 
    else:
        logger.debug("Geocoding %s" % data["_id"])
        value = getprop(data, prop)
        for v in iterify(value):
            if "name" not in v:
                continue

            if coordinate(v["name"]):
                result = coordinate(v["name"])
            else:
                # Attempt to find this item's lat/lng coordinates
                result = DplaBingGeocoder(api_key=module_config().get("bing_api_key")).geocode_spatial(v)

            if (result):
                lat, lng = result
                v[newprop] = "%s, %s" % (lat, lng,)

                # Reverse-geocode this location to find country, state, and county parent places
                hierarchy = DplaGeonamesGeocoder().reverse_geocode_hierarchy(lat, lng, ["PCLI", # Country
                                                                                        "ADM1", # State
                                                                                        "ADM2"]) # County
                for place in hierarchy:
                    fcode = place["fcode"]
                    if ("PCLI" == place["fcode"]):
                        v["country"] = place["toponymName"]
                    elif ("ADM1" == place["fcode"]):
                        v["state"] = place["toponymName"]
                    elif ("ADM2" == place["fcode"]):
                        v["county"] = place["toponymName"]

                    # Deterine how close we are to the original coordinates, to see if this is the
                    # place that was geocoded and we should stop adding more specificity (e.g. if
                    # the record specified "South Carolina", we don't want to include the county
                    # that is located at the coordinates of South Carolina. We use increasing
                    # tolerance levels to account for differences between Bing and Geonames
                    # coordinates.
                    d = haversine((lat, lng), (place["lat"], place["lng"]))
                    if (("PCLI" == place["fcode"] and d < 50) # Country tolerance (Bing/Geonames 49.9km off) \
                        or ("ADM1" == place["fcode"] and d < 15)): # State tolerance
                        break
            else:
                logger.debug("No geocode result found for %s" % v)

    return json.dumps(data)
Ejemplo n.º 34
0
from akara.caching import cache, make_named_cache

from zen import spreadsheet as spreadsheetlib
from zen.temporal import smart_parse_date
from zen.csvhelper import readcsv
from zen.mods import mods2json#, MODS_NAMESPACE
from zen.geo import geolookup
from zen.whatfile import guess_imt
from zen.feeds import webfeed
from zen.exhibit import UNSUPPORTED_IN_EXHIBITKEY

from . import __version__ as VERSION

CHUNKCOUNT = 10

BIBTEX2MODS = module_config().get('bib2xml_command', 'bib2xml')
DIAGNOSTICS = module_config().get('dataload_diagnostics', False)

BIBTEX_IMT = ["application/x-bibtex", "text/x-bibtex"]

GENERIC_BINARY_IMT = 'application/octet-stream'
UNKNOWN_IMT = 'application/unknown'
UNKNOWN_TEXT_IMT = 'text/unknown'
EXCEL_IMTS = ['application/vnd.ms-excel', 'application/vnd.ms-office', 'application/msword', GENERIC_BINARY_IMT]

#FIXME: Will grow monotonically.  Switch to LRU algo
#CACHE = {}
EXHIBIT_RESERVED = ['label', 'id', 'type']

MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
Ejemplo n.º 35
0
import urllib, urllib2, urlparse
from subprocess import *
import cgi
from cStringIO import StringIO
from itertools import *
from contextlib import closing

from amara import _
from amara.lib.util import *

import akara
from akara.util import copy_auth
from akara.services import simple_service

Q_REQUIRED = _("The 'q' POST parameter is mandatory.")
SVN_COMMIT_CMD = akara.module_config().get('svn_commit', 'svn commit -m "%(msg)s" %(fpath)s')
SVN_ADD_CMD = akara.module_config().get('svn_add', 'svn add %(fpath)s')

SERVICE_ID = 'http://purl.org/akara/services/demo/svncommit'
@simple_service('POST', SERVICE_ID, 'akara.svncommit', 'text/plain')
def svncommit(body, ctype, **params):
    '''Commit a file. Can optionally populate the file contents from a given URL.

    The form parameters are:
      fpath - the name of the file to commit to SVN
      msg - the commit message
      q (optional) - fetch the given URL and save it to the specified file before commmiting
    
    The form must be POSTed as multipart/form-data. If the request includes
    the 'q' parameter then the new fetch will contain authentication 
    forward 
Ejemplo n.º 36
0
VALUE_SET_TYPE = 'value_set'
VARIABLE_LABELS_TYPE = 'variable_labels'
VALUE_LABELS_TYPE = 'value_labels'

#R_SCRIPT = '''library(foreign)
#mydata <- read.spss(file='%s')
#write.csv2(mydata)
#'''

R_SCRIPT = '''library(Hmisc)
mydata <- spss.get(file='%s')
write.csv2(mydata)
'''

R_FILE_CMD = akara.module_config(__name__).get('r_command', 'r')

POR_REQUIRED = _("The 'POR' POST parameter is mandatory.")

SERVICE_ID = 'http://purl.org/akara/services/demo/spss.json'


@simple_service('POST', SERVICE_ID, 'spss.json', 'application/json')
def spss2json(body, ctype, **params):
    '''
    Uses GNU R to convert SPSS to JSON
    Optionally tries to guess long labels from an original .SPS file
    
    Requires POST body of multipart/form-data
    
    Sample request:
Ejemplo n.º 37
0
from contextlib import closing

from amara.thirdparty import json, httplib2

from akara.services import simple_service
from akara import logger
from akara import response
from akara.caching import cache, make_named_cache
from akara import module_config

from zen.services import service_proxy
from zen import augmentation
from zen.akamod import geolookup_service
#from zen.geo import local_geonames, US_STATE_FIRST

GEONAMES_PLUS_DBFILE = module_config().get('geonames_dbfile')

CHUNKCOUNT = 10

UNSUPPORTED_IN_EXHIBITKEY = re.compile('\W')

EXHIBIT_RESERVED = ['label', 'id', 'type']

#GEOLOOKUP_CACHE = cache('http://purl.org/com/zepheira/services/geolookup.json', expires=24*60*60)


def post(body, sink):
    headers = {'Content-type': 'application/json'}
    h = httplib2.Http()
    resp, content = h.request(sink, "POST", body=body, headers=headers)
    return resp, content
Ejemplo n.º 38
0
import os
from dplaingestion.mappers.oac_dc_mapper import OAC_DCMapper
from dplaingestion.selector import exists, getprop
from dplaingestion.utilities import iterify
from akara import module_config

URL_OAC_CONTENT_BASE = module_config().get(
                        'url_oac_content',
                        os.environ.get('URL_OAC_CONTENT_BASE',
                                        'http://content.cdlib.org')
                        )

class OAC_DCMapperSuppressPublisher(OAC_DCMapper):
    '''Mapper for OAC xml feed with bogus publisher fields'''
    # sourceResource mapping
    def map_publisher(self):
        pass
Ejemplo n.º 39
0
Archivo: z.py Proyecto: dpla/zen
from akara.util import find_peer_service, extract_auth, read_http_body_to_temp, copy_headers_to_dict
from akara.util import status_response
from akara.util.moin import ORIG_BASE_HEADER, DOCBOOK_IMT, RDF_IMT, HTML_IMT, XML_IMT
#from akara.services import simple_service
from akara.services import method_dispatcher
from akara import request, logger, module_config
from akara.opensearch import apply_template

from zen.util import requested_imt, requested_lang
from zen import ZEN_SERVICE_ID

#import zenlib.moinmodel
#from zenlib.moinmodel import node, rulesheet, moinrest_resolver, parse_moin_xml, zenuri_to_moinrest, MOINREST_SERVICE_ID
#from zenlib.util import find_peer_service

SECRET = module_config().get('RULESHEET_SECRET', '')
SPACESDEF = module_config()['SPACES']()
SPACES = {}

UNSUPPORTED_IN_FILENAME = re.compile('\W')

DEFAULT_MOUNT = 'zen'

H = httplib2.Http('/tmp/.cache')

FIRST_REQUEST_FLAG = False
#def module_load():

FIND_PEER_SERVICE_KEY = 'akara.FIND_PEER_SERVICE'

def setup_request(environ):
import os
from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json
from dplaingestion.selector import getprop, setprop, exists
from akara import module_config

IGNORE = module_config().get("IGNORE")
PENDING = module_config().get("PENDING")


@simple_service(
    "POST", "http://purl.org/la/dp/kentucky_identify_object", "kentucky_identify_object", "application/json"
)
def kentucky_identify_object(body, ctype, download="True"):
    """
    Responsible for: adding a field to a document with the URL where we
    should expect to the find the thumbnail
    """
    data = {}
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header("content-type", "text/plain")
        return "Unable to parse body as JSON"

    relation_field = "sourceResource/relation"
    if exists(data, relation_field):
        url = getprop(data, relation_field)
Ejemplo n.º 41
0
import amara
from amara.thirdparty import httplib2

import akara
from akara.services import simple_service
from akara import response
from akara import logger
from akara.util import normalize_http_header_name

import calendar
import email
import email.Utils
import time

MAXLEN = akara.module_config().get('maxlen')
if None in MAXLEN:
    DEFAULT_MAXLEN = MAXLEN[None]
    del MAXLEN[None]
else:
    DEFAULT_MAXLEN = 3600

OVERRIDE_STALE = akara.module_config().get('override_stale',0)

CACHE_PROXY_SERVICE_ID = 'http://purl.org/xml3k/akara/services/demo/cache-proxy'

MAXAGE_HEADER = lambda age: ('Cache-Control','max-age={0}'.format(age))

#FIXME: recycle after N uses
H = httplib2.Http()
Ejemplo n.º 42
0
None
'''

import amara
#from amara.bindery import html
from amara.thirdparty import json, httplib2
from amara.lib import irihelpers, inputsource

from akara.services import simple_service
from akara import request, response, logger, module_config

TOCOUCH_SERVICE_ID = 'http://purl.org/akara/services/demo/tocouch'

H = httplib2.Http('/tmp/.cache')

COUCHBASE = module_config().get('couchbase', 'http://sforza.ogbuji.net:5984/famulus/')

@simple_service('GET', TOCOUCH_SERVICE_ID, 'tocouch', 'text/html')
def tocouch(**params):
    '''
    @xslt - URL to the XSLT transform to be applied
    all other query parameters are passed ot the XSLT processor as top-level params
    
    Sample request:
    curl --request POST --data-binary "@foo.xml" --header "Content-Type: application/xml" "http://*****:*****@xslt=http://hg.akara.info/amara/trunk/raw-file/tip/demo/data/identity.xslt"
    
    You can check after the fact by visiting http://sforza.ogbuji.net:5984/test1/_all_docs
    
    Then get the id and surf there
    
    http://sforza.ogbuji.net:5984/test1/b10d978ced600227e663d6503b1abec4
Ejemplo n.º 43
0
import amara
from amara.thirdparty import httplib2

import akara
from akara.services import simple_service
from akara import response
from akara import logger
from akara.util import normalize_http_header_name

import calendar
import email
import email.Utils
import time

MAXLEN = akara.module_config().get('maxlen')
if None in MAXLEN:
    DEFAULT_MAXLEN = MAXLEN[None]
    del MAXLEN[None]
else:
    DEFAULT_MAXLEN = 3600

OVERRIDE_STALE = akara.module_config().get('override_stale', 0)

CACHE_PROXY_SERVICE_ID = 'http://purl.org/xml3k/akara/services/demo/cache-proxy'

MAXAGE_HEADER = lambda age: ('Cache-Control', 'max-age={0}'.format(age))

#FIXME: recycle after N uses
H = httplib2.Http()
Ejemplo n.º 44
0
from akara.services import simple_service
from akara import request, response
from akara import module_config, logger
from akara.util import copy_headers_to_dict
from amara.thirdparty import json, httplib2
from amara.lib.iri import join
import uuid

COUCH_DATABASE = module_config().get('couch_database')

# FIXME: this should be JSON-LD, but CouchDB doesn't support +json yet
CT_JSON = {'Content-Type': 'application/json'}

H = httplib2.Http()
H.force_exception_as_status_code = True

# FIXME: should support changing media type in a pipeline
def pipe(content,ctype,enrichments,wsgi_header):
    body = json.dumps(content)
    for uri in enrichments:
        if len(uri) < 1: continue # in case there's no pipeline
        headers = copy_headers_to_dict(request.environ,exclude=[wsgi_header])
        headers['content-type'] = ctype
        resp, cont = H.request(uri,'POST',body=body,headers=headers)
        if not str(resp.status).startswith('2'):
            logger.debug("Error in enrichment pipeline at %s: %s"%(uri,repr(resp)))
            continue

        body = cont
    return body
from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json
from dplaingestion.selector import getprop, setprop, exists
from akara import module_config
from amara.lib.iri import is_absolute

IGNORE = module_config().get('IGNORE')
PENDING = module_config().get('PENDING')


@simple_service('POST', 'http://purl.org/la/dp/contentdm_identify_object',
                'contentdm_identify_object', 'application/json')
def contentdm_identify_object(body, ctype, download="True"):
    """
    Responsible for: adding a field to a document with the URL where we
    should expect to the find the thumbnail.

    There are two methods of creating the thumbnail URL:
    1. Replacing "cdm/ref" with "utils/getthumbail" in the handle field
       Example:
           handle: http://test.provider/cdm/ref/collection/1/id/1
           thumbnail: http://test.provider/utils/getthumbnail/collection/1/id/1

    2. Splitting the handle field on "u?" and using the parts to compose the
       thumbnail URL.
       Example:
            handle: http://test.provider/u?/ctm,101
            thumbnail: http://test.provider/cgi-bin/thumbnail.exe?CISOROOT=/ctm&CISOPTR=101"
    """
Ejemplo n.º 46
0
from amara.lib.iri import split_fragment, split_uri_ref, unsplit_uri_ref, split_authority
#from amara import inputsource

# Akara Imports
from akara import module_config, logger, response
from akara.util import multipart_post_handler, wsgibase, http_method_handler, copy_headers_to_dict
from akara.services import method_dispatcher
from akara.util import status_response, read_http_body_to_temp
from akara.util import BadTargetError, HTTPAuthorizationError, MoinAuthorizationError, UnexpectedResponseError, MoinMustAuthenticateError, MoinNotFoundError, ContentLengthRequiredError, GenericClientError, ConflictError
import akara.util.moin as moin

# ======================================================================
#                         Module Configruation
# ======================================================================

TARGET_WIKIS = module_config().get("targets", {})
TARGET_WIKI_OPENERS = {}
DEFAULT_OPENER = urllib2.build_opener(
    urllib2.HTTPCookieProcessor(), multipart_post_handler.MultipartPostHandler)

# Specifies the default max-age of Moin pages
CACHE_MAX_AGE = module_config().get("CACHE_MAX_AGE", None)

# Specifies a Wiki path (currently only one, FIXME) under which no caching will occur
NO_CACHE_PATHS = module_config().get("NO_CACHE_PATHS", None)

# Look at each Wiki URL and build an appropriate opener object for retrieving
# pages.   If the URL includes HTTP authentication information such as
# http://user:[email protected]/mywiki, the opener is built with
# basic authentication enabled.   For details, see:
#
Ejemplo n.º 47
0
from StringIO import StringIO
from akara import module_config
import pprint
import sys
import re
import os
import os.path
import urllib
from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json
from dplaingestion.selector import getprop, setprop, exists

# The main directory where the images will be saved.
THUMBS_ROOT_PATH = module_config().get('thumbs_root_path')

# The dictionary containing mapping of MIME type to file extension.
# What's more, only those MIME types will be saved.
MIME_TYPES = module_config().get('mime_to_type')


def update_document(document, filepath, mime, status):
    """
    Updates the document with a filepath of downloaded thumbnail..

    Arguments:
        document object - document for updating (decoded by json module)
        filepath string - filepath to insert

    Returns:
Ejemplo n.º 48
0
Archivo: static.py Proyecto: dpla/akara
        # This is a very simple implementation of conditional GET with
        # the Last-Modified header. It makes media files a bit speedier
        # because the files are only read off disk for the first request
        # (assuming the browser/client supports conditional GET).
        mtime = formatdate(os.stat(filename).st_mtime, usegmt=True)
        headers = [('Last-Modified', mtime)]
        if environ.get('HTTP_IF_MODIFIED_SINCE', None) == mtime:
            status = '304 Not Modified'
            output = ()
        else:
            status = '200 OK'
            mime_type = mimetypes.guess_type(filename)[0]
            if mime_type:
                headers.append(('Content-Type', mime_type))
            output = [fp.read()]
            fp.close()
        start_response(status, headers)
        return output

import akara

if not akara.module_config():
    akara.logger.warn("No configuration section found for %r" % (__name__,))
    
paths = akara.module_config().get("paths", {})

for path, root in paths.items():
    handler = MediaHandler(root)
    registry.register_service(SERVICE_ID, path, handler)
Ejemplo n.º 49
0
from akara import module_config
import pprint
import sys
import re
import os
import os.path
import urllib
from akara import logger
from akara import response
from akara.services import simple_service
from amara.thirdparty import json
from dplaingestion.selector import getprop, setprop, exists


# The main directory where the images will be saved.
THUMBS_ROOT_PATH = module_config().get('thumbs_root_path')


# The dictionary containing mapping of MIME type to file extension.
# What's more, only those MIME types will be saved.
MIME_TYPES = module_config().get('mime_to_type')


def update_document(document, filepath, mime, status):
    """
    Updates the document with a filepath of downloaded thumbnail..

    Arguments:
        document object - document for updating (decoded by json module)
        filepath string - filepath to insert
Ejemplo n.º 50
0
from amara.thirdparty import json, httplib2

from akara.services import simple_service
from akara import logger
from akara import response
from akara.caching import cache, make_named_cache
from akara import module_config

from zen.services import service_proxy
from zen import augmentation
from zen.akamod import geolookup_service

# from zen.geo import local_geonames, US_STATE_FIRST


GEONAMES_PLUS_DBFILE = module_config().get("geonames_dbfile")

CHUNKCOUNT = 10

UNSUPPORTED_IN_EXHIBITKEY = re.compile("\W")

EXHIBIT_RESERVED = ["label", "id", "type"]

# GEOLOOKUP_CACHE = cache('http://purl.org/com/zepheira/services/geolookup.json', expires=24*60*60)


def post(body, sink):
    headers = {"Content-type": "application/json"}
    h = httplib2.Http()
    resp, content = h.request(sink, "POST", body=body, headers=headers)
    return resp, content
Ejemplo n.º 51
0
from amara import bindery
from amara.thirdparty import json

from akara.services import simple_service
from akara.util import status_response
from akara import response
from akara import logger
from akara import module_config

#from zen.latlong import latlong
from zen.geo import local_geonames, geonames_service

LOCAL_GEONAMES = 'http://purl.org/com/zepheira/services/geocoders/local-geonames'
GEONAMES_SERVICE = 'http://purl.org/com/zepheira/services/geocoders/geonames-service'

GEOCODER = module_config().get('geocoder', LOCAL_GEONAMES)

GEONAMES_PLUS_DBFILE = module_config().get('geonames_dbfile')
GEONAMES_SERVICE_USER = module_config().get('geonames_service_user')

# Specifies the default max-age of across-the-board lookups
CACHE_MAX_AGE = str(module_config().get('cache_max_age'))

geocoder_func = None
if GEOCODER == LOCAL_GEONAMES:
    geocoder_func = local_geonames(GEONAMES_PLUS_DBFILE, logger=logger)

if GEOCODER == GEONAMES_SERVICE:
    geocoder_func = geonames_service(user=GEONAMES_SERVICE_USER, logger=logger)

Ejemplo n.º 52
0
from dplaingestion.oai import oaiservice
from couchdb.client import Server

from akara.services import simple_service
from akara import request, response
from akara import module_config, logger
from akara.util import copy_headers_to_dict
from amara.thirdparty import json, httplib2
from amara.lib.iri import join
from urllib import quote
import datetime
import uuid
import base64

# Configuration for accessing the database.
COUCH_DATABASE = module_config().get('couch_database')
COUCH_DATABASE_USERNAME = module_config().get('couch_database_username')
COUCH_DATABASE_PASSWORD = module_config().get('couch_database_password')

COUCH_AUTH_HEADER = { 'Authorization' : 'Basic ' + base64.encodestring(COUCH_DATABASE_USERNAME+":"+COUCH_DATABASE_PASSWORD) }
CT_JSON = {'Content-Type': 'application/json'}

# The app name used for accessing the views.
VIEW_APP = "thumbnails"

# The view name for accessing the documents which need getting the thumbnail.
VIEW_NAME = "all_for_downloading"

UPDATE_SERVICE_ID = 'http://purl.org/la.dp/dpla-thumbs-update-doc'
LISTRECORDS_SERVICE_ID = 'http://purl.org/la.dp/dpla-thumbs-list-for-downloading'