Ejemplo n.º 1
0
 def test_load_session(self):
     from lingcod.common.utils import load_session
     request = TestRequest()
     load_session(request, '0')
     self.assertEquals(request.session, None)
     load_session(request, md5('blah').hexdigest())
     self.assertEquals(request.session.__class__.__name__, 'SessionStore')
Ejemplo n.º 2
0
def qrcode(req, width):
    import qr

    url = req.GET.get('url')
    if url is None:
        raise Http404

    try:
        data = url.encode('ascii')
    except UnicodeError:
        # only supports URLs properly urlencoded
        raise Http404

    if width == "480":
        magnify = 8
    else:
        magnify = 4

    buf = StringIO()
    try:
        qr.qrcode(data, buf, format=qr.GIF, magnify=magnify)
    except ValueError:
        # qr module wasn't be compiled with GD library
        raise Http404

    content = buf.getvalue()

    CACHE_TIMEOUT = 86400
    res = HttpResponse(content, content_type='image/gif')
    res['Content-Length'] = str(len(content))
    res['ETag'] = '"%s"' % md5(content).hexdigest()
    res['Last-Modified'] = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)
    return res
Ejemplo n.º 3
0
def qrcode(req, width):
    import qr

    url = req.GET.get('url')
    if url is None:
        raise Http404

    try:
        data = url.encode('ascii')
    except UnicodeError:
        # only supports URLs properly urlencoded
        raise Http404

    if width == "480":
        magnify = 8
    else:
        magnify = 4

    buf = StringIO()
    try:
        qr.qrcode(data, buf, format=qr.GIF, magnify=magnify)
    except ValueError:
        # qr module wasn't be compiled with GD library
        raise Http404

    content = buf.getvalue()

    CACHE_TIMEOUT = 86400
    res = HttpResponse(content, content_type='image/gif')
    res['Content-Length'] = str(len(content))
    res['ETag']           = '"%s"' % md5(content).hexdigest()
    res['Last-Modified']  = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)
    return res
Ejemplo n.º 4
0
        def wrapper(self, *args, **kwargs):

            current_class = self.__class__

            blocking_id = \
                    getmodule(current_class).__name__ + \
                    '.' + \
                    current_class.__name__

            method = u""

            for param in register_params:
                method += unicode(param) + u"=" + unicode(kwargs[param])

            digest = md5(method).hexdigest()

            lock_id = "%s-lock-%s" % (blocking_id, digest)

            acquire_lock = lambda: cache.add(lock_id, 'true', expires)

            release_lock = lambda: cache.delete(lock_id)

            if acquire_lock():
                value = cache.get(lock_id)
                try:
                    value = view_func(self, *args, **kwargs)
                except Exception, e:
                    pass
                finally:
Ejemplo n.º 5
0
 def test_load_session(self):
     from madrona.common.utils import load_session
     request = TestRequest()
     load_session(request, '0')
     self.assertEquals(request.session, None)
     load_session(request, md5('blah').hexdigest())
     self.assertEquals(request.session.__class__.__name__, 'SessionStore')
Ejemplo n.º 6
0
def border(req, style, rgb):
    import gd
    rgb = tuple(map(lambda x: int(x, 16), (rgb[0:2], rgb[2:4], rgb[4:6])))

    try:
        width = int(req.GET.get('w', 228))
    except (ValueError, TypeError):
        width = 228

    try:
        height = int(req.GET.get('h', 1))
    except (ValueError, TypeError):
        height = 1

    if width < 1 or height < 1:
        raise Http404

    if rgb != (0, 0, 0):
        # if line is black, then use white(#FFFFF) as background color
        backcolor = (255, 255, 255)
    else:
        backcolor = (0, 0, 0)

    # TODO
    # check display width
    img = gd.image((width, height))

    back = img.colorAllocate(backcolor)
    img.colorTransparent(back)

    color = img.colorAllocate(rgb)

    if style == 'dotted':
        pattern = (color, color, back, back)
    elif style == 'dashed':
        pattern = (color, color, color, back, back, back)
    else:
        # solid
        pattern = (color, )

    img.setStyle(pattern)
    for y in xrange(height):
        img.line((0, y), (width, y), gd.gdStyled)

    fp = StringIO()
    img.writeGif(fp)
    content = fp.getvalue()
    fp.close()

    content_type = 'image/gif'
    res = HttpResponse(content, content_type=content_type)
    res['Content-Type'] = content_type
    res['Content-Length'] = str(len(content))
    res['ETag'] = '"%s"' % md5(content).hexdigest()
    res['Last-Modified'] = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)

    return res
Ejemplo n.º 7
0
def border(req, style, rgb):
    import gd
    rgb = tuple(map(lambda x: int(x, 16), (rgb[0:2], rgb[2:4], rgb[4:6])))

    try:
        width = int(req.GET.get('w', 228))
    except (ValueError, TypeError):
        width = 228

    try:
        height = int(req.GET.get('h', 1))
    except (ValueError, TypeError):
        height = 1

    if width < 1 or height < 1:
        raise Http404

    if rgb != (0, 0, 0):
        # if line is black, then use white(#FFFFF) as background color
        backcolor = (255, 255, 255)
    else:
        backcolor = (0, 0, 0)

    # TODO
    # check display width
    img = gd.image((width, height))

    back = img.colorAllocate(backcolor)
    img.colorTransparent(back)

    color = img.colorAllocate(rgb)

    if style == 'dotted':
        pattern = (color, color, back, back)
    elif style == 'dashed':
        pattern = (color, color, color, back, back, back)
    else:
        # solid
        pattern = (color,)

    img.setStyle(pattern)
    for y in xrange(height):
        img.line((0, y), (width, y), gd.gdStyled)

    fp = StringIO()
    img.writeGif(fp)
    content = fp.getvalue()
    fp.close()

    content_type = 'image/gif'
    res = HttpResponse(content, content_type=content_type)
    res['Content-Type']   = content_type
    res['Content-Length'] = str(len(content))
    res['ETag']           = '"%s"' % md5(content).hexdigest()
    res['Last-Modified']  = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)

    return res
Ejemplo n.º 8
0
def update_flickr_users(results, page=1, per_page=1, all_photos=False):
    limit = page * per_page
    offset = limit - per_page

    flickr_users = FlickrUser.objects.order_by("date_create")[offset:limit]
    user_updates = []

    for flickr_user in flickr_users:
        nsid_digest = md5(flickr_user.nsid).hexdigest()
        lock_id = "%s-lock-%s" % ("update_photos", nsid_digest)

        # cache.add fails if if the key already exists
        acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)

        if acquire_lock:
            try:
                # First, update the flickr_user
                rsp = flickr.people.getInfo(user_id=flickr_user.nsid, format="json", nojsoncallback="true")
                json = simplejson.loads(rsp)

                if json and json["stat"] == "ok":
                    api_user = json["person"]

                    flickr_user.username = api_user["username"]["_content"]
                    flickr_user.iconserver = api_user["iconserver"]
                    flickr_user.iconfarm = api_user["iconfarm"]
                    flickr_user.count_photos = api_user["photos"]["count"]["_content"]

                    try:
                        flickr_user.realname = api_user["realname"]["_content"]
                    except KeyError:
                        flickr_user.realname = None

                    try:
                        flickr_user.path_alias = api_user["path_alias"]
                    except KeyError:
                        flickr_user.path_alias = None

                    flickr_user.save()

            except URLError, e:
                logger.error(
                    "Problem talking to Flickr when calling people.getInfo from update_flickr_users (URLError), will try again. Reason: %s"
                    % (e.reason)
                )
                return update_photos_for_flickr_user.retry(countdown=5)

            except FlickrError, e:
                logger.error(
                    "Problem talking to Flickr when calling people.getInfo from update_flickr_users (FlickrError), re-scheduling task.\n Error: %s"
                    % (e)
                )
                raise update_photos_for_flickr_user.retry(countdown=5)

            user_updates.append(update_photos_for_flickr_user.subtask((None, flickr_user.nsid, None, all_photos)))
Ejemplo n.º 9
0
        def _caller(*args, **kwargs):
            """Caller."""
            ret_value = None
            have_lock = False
            args_list = u','.join([unicode(arg) for arg in args])
            key = u"{0}-lock-{1}".format(run_func.__name__, md5(args_list.encode('utf-8')).hexdigest())
            lock = REDIS_CLIENT.lock(key, timeout=timeout)
            try:
                have_lock = lock.acquire(blocking=False)
                if have_lock:
                    ret_value = run_func(*args, **kwargs)
            finally:
                if have_lock:
                    lock.release()

            return ret_value
Ejemplo n.º 10
0
        def _caller(*args, **kwargs):
            """Caller."""
            ret_value = None
            have_lock = False
            args_list = u','.join([unicode(arg) for arg in args])
            key = u"{0}-lock-{1}".format(
                run_func.__name__,
                md5(args_list.encode('utf-8')).hexdigest())
            lock = REDIS_CLIENT.lock(key, timeout=timeout)
            try:
                have_lock = lock.acquire(blocking=False)
                if have_lock:
                    ret_value = run_func(*args, **kwargs)
            finally:
                if have_lock:
                    lock.release()

            return ret_value
Ejemplo n.º 11
0
    def _wrapped_view(*args, **kwargs):

        method = unicode(args) + unicode(kwargs)
        digest = md5(method).hexdigest()

        lock_id = "%s-lock-%s" % (blocking_id, digest)

        acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE)

        release_lock = lambda: cache.delete(lock_id)

        if acquire_lock():
            value = cache.get(lock_id)
            try:
                value = view_func(*args, **kwargs)
            finally:
                release_lock()
                return value
Ejemplo n.º 12
0
def image_proxy(req, headers=None, cache=None, timeout=None, proxy_info=None):
    import httplib2
    import gd

    form = ImageProxyForm(req.GET)
    if not form.is_valid():
        return HttpResponseBadRequest()

    url = form.cleaned_data['url']
    width = form.cleaned_data.get('w')
    height = form.cleaned_data.get('h')
    format = form.cleaned_data.get('f')
    timeout = form.cleaned_data.get('t') or timeout
    referrer = form.cleaned_data.get('r')

    headers = headers or {}
    if referrer:
        headers['Referer'] = referrer

    conn = httplib2.Http(cache, timeout, proxy_info)
    result, content = conn.request(url, headers=headers)

    status = int(result['status'])
    if status not in [200, 304]:
        return HttpResponse(status=status)

    try:
        img = Image.open(StringIO(content))
    except IOError:
        raise Http404

    w, h = img.size
    if width and height:
        if (w <= width and h <= height):
            need_resize = False
        else:
            need_resize = True

            w_ratio = width / float(w)
            h_ratio = height / float(h)
            # use smaller ratio
            ratio = min(w_ratio, h_ratio)
            size = (int(w * ratio), int(h * ratio))
    else:
        # either width or height is undefined
        if height is not None and h > height:
            need_resize = True
            ratio = height / float(h)
            size = (int(w * ratio), int(h * ratio))
        elif width is not None and w > width:
            need_resize = True
            ratio = width / float(w)
            size = (int(w * ratio), int(h * ratio))
        else:
            need_resize = False

    format = img.format.lower()
    if format == 'gif':
        content_type = 'image/gif'

        if need_resize:
            newimage = gd.image(size)

            tmp = StringIO()
            img.save(tmp, 'PNG')
            tmp.seek(0)

            gdimage = gd.image(tmp, 'png')

            # resize
            gdimage.copyResizedTo(newimage, (0, 0), (0, 0), size, img.size)

            # get result
            output = StringIO()
            newimage.writeGif(output)
            # override image binary content
            content = output.getvalue()

    else:
        # force output to be JPEG
        content_type = 'image/jpeg'

        if need_resize or format != 'image/jpeg':
            # resize
            if need_resize:
                img = img.resize(size)

            # change color mode to RGB if not
            if img.mode != 'RGB':
                img = img.convert("RGB")

            output = StringIO()
            img.save(output, 'JPEG')
            # override image binary content
            content = output.getvalue()


    CACHE_TIMEOUT = 86400
    res = HttpResponse(content, content_type=content_type)
    res['Content-Length'] = str(len(content))
    res['ETag']           = '"%s"' % md5(content).hexdigest()
    res['Last-Modified']  = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)
    return res
Ejemplo n.º 13
0
def process_flickr_photo(api_photo, nsid):
    logger.info("Processing photo %s for user %s.\n" % (api_photo['id'], nsid))
    
    try:
        # Query Flickr for this photo's Exif data
        exif_rsp = flickr.photos.getExif(photo_id=api_photo['id'],format="json",nojsoncallback="true")
        json = simplejson.loads(exif_rsp)
        
        # If it exists, process it
        if json and json['stat'] == 'ok':
            exif_camera = ""
            raw_exif_make = ""
            exif_make = ""
            raw_exif_model = ""
            exif_model = ""
            exif_software = ""
            
            try:
                exif = json['photo']['exif']
                
                for tag in exif:
                    if tag['label'] == "Make" :
                        raw_exif_make = tag['raw']['_content']
                                    
                    if tag['label'] == "Model" :
                        raw_exif_model = tag['raw']['_content']
                                    
                    if tag['label'] == "Software" :
                        exif_software = tag['raw']['_content']
                    
                # This is the "name" that Flickr uses, it's usually nice
                # if exif['photo']['camera']:
                #    exif_camera = exif['photo']['camera']
            
                # Create a clean version of the raw Exif make
                exif_make = clean_make(raw_exif_make)
            
                # Create a clean version of the raw Exif model, and remove the make if it's duplicated
                exif_model = clean_model(raw_exif_model, exif_make)
                
                # If there's a model (camera) we'll carry on
                if exif_model:
                
                    # Process the date taken and date upload into nice time objecs
                
                    # Date taken is a time string of the local time when the photo was taken,
                    # we don't know the time zone, so we'll store it as UTC and always display it as UTC
                    naive = parse_datetime(api_photo['datetaken'])
                    api_date_taken = pytz.timezone("UTC").localize(naive)
                
                    # Date upload is a unix timestamp, so we can store it as UTC and convert to whatever tz we want.
                    api_date_upload = datetime.utcfromtimestamp(float(api_photo['dateupload'])).replace(tzinfo=timezone.utc)
                
                    # Create the camera slug with things that should never change
                    # I would use exif_camera, but I'm afraid those might change on Flickr's side
                    camera_slug = slugify(exif_make + " " + exif_model)
                
                    # Create a name for the camera
                    if exif_make:
                        camera_name = exif_make + " " + exif_model
                    else:
                        camera_name = exif_model
                        
                    # Try to create the camera, or get it if it existsg
                    try:
                        camera, created = Camera.objects.get_or_create(
                            slug = camera_slug,
                            defaults = {
                                'name': camera_name,
                                'model': exif_model,
                                'exif_model': raw_exif_model,
                                'exif_make': raw_exif_make,
                                'count': 0,
                                'count_photos': 0,
                            }
                        )
                    
                    except IntegrityError:
                        logger.warning("Camera %s already exists, but we're trying to add it again. Rescheduling task." % (camera_name))
                        raise process_flickr_photo.retry(countdown=5)
                    
                    if created:
                        if exif_make:
                            make_slug = slugify(exif_make)
                        
                            try:
                                make, created = Make.objects.get_or_create(
                                    slug = make_slug,
                                    defaults = {
                                        'name': exif_make,
                                        'count': 1,
                                    }
                                )
                            
                            except IntegrityError:
                                logger.warning("Make %s already exists, but we're trying to add it again. Rescheduling task." % (exif_make))
                                raise process_flickr_photo.retry(countdown=5)
                            
                            if not created:
                                Make.objects.filter(slug=make_slug).update(count=F('count')+1)
                            
                            camera.make = make
                            camera.save()
                
                    # In case we need to create cache keys
                    id_digest = md5(str(camera.id)).hexdigest()
                
                    # A little bonus here, if the camera doesn't have aws info, try to get it.
                    if not camera.amazon_item_response:
                        lock_id = "%s-lock-%s" % ("aws_update", id_digest)
                        acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
                    
                        if acquire_lock():
                            logger.info("Fetching aws info for %s." % (camera.name))
                            add_aws_item_to_camera.delay(camera.id)
                        
                        else:
                            logger.info("AWS item update for %s already scheduled, skipping." % (camera.name))
                            
                    photo, created = Photo.objects.get_or_create(
                        photo_id = api_photo['id'],
                        defaults = {
                            'secret': api_photo['secret'],
                            'server': api_photo['server'],
                            'farm': api_photo['farm'],
                            'license': api_photo['license'],
                            'media': api_photo['media'],
                            'owner_nsid': api_photo['owner'],
                            'owner_name': api_photo['ownername'],
                            'date_taken': api_date_taken,
                            'date_upload': api_date_upload,
                            'camera': camera,
                        }
                    )
                    
                    if created:
                        photo.title = api_photo['title']
                        photo.path_alias = api_photo['pathalias']
                        photo.date_taken = api_date_taken
                        photo.date_upload = api_date_upload
                        photo.comments_count = api_photo['count_comments']
                        photo.faves_count = api_photo['count_faves']
                
                        if camera.make:
                            photo.camera_make = camera.make
                    
                        if api_photo['latitude'] or api_photo['longitude'] and api_photo['geo_is_public']:
                            photo.has_geo =  1
                            photo.latitude = api_photo['latitude']
                            photo.longitude = api_photo['longitude']
                            photo.accuracy = api_photo['accuracy']
                            photo.context = api_photo['context']
                    
                        else:
                            photo.has_geo = 0
                    
                        # Ok, save the photo.
                        logger.info("Saving photo %s for camera %s.\n" % (photo.photo_id, camera.name))
                        photo.save()
                        
                        Camera.objects.filter(slug=camera_slug).update(count_photos=F('count_photos')+1)
                        return photo.photo_id
                    
                    else:
                        logger.info("We've seen this photo before, moving on.")
                        return False
                    
                # The photo doesn't have camera info
                else:
                    return False
                    
            except KeyError:
                logger.error("KeyError! The photo doesn't have Exif data. (%s)" % (api_photo['id']))
                return False
                
        else:
            logger.info("We probably don't have permission to see the Exif, carry on. %s" % (api_photo['id']))
            return False
            #raise fetch_photos_for_flickr_user.retry(countdown=5)
            
    except URLError:
        logger.error("Problem talking to Flickr in process_photo (URLError), re-scheduling task.")
        raise fetch_photos_for_flickr_user.retry(countdown=5)
        
    except FlickrError, e:
        logger.error("Problem talking to Flickr in process_photo (FlickrError), re-scheduling task.\n Error: %s" % (e))
        raise fetch_photos_for_flickr_user.retry(countdown=5)
def transfer_experiment(source):
    """
    Pull public experiments from source into current mytardis.
    """

    #TODO: Cleanup error messages
    #TODO: does not transfer liences as not part of METS format.
    #NOTE: As this is a pull we trust the data from the other tardis
    # Check identity of the feed
    from oaipmh.client import Client
    from oaipmh import error
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    from django.core.cache import cache
    from django.utils.hashcompat import md5_constructor as md5

    # The cache key consists of the task name and the MD5 digest
    # of the feed URL.
    cache_key = md5("token").hexdigest()
    lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
    LOCK_EXPIRE = 60 * 5
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    # memcache delete is very slow, but we have to use it to take
    # advantage of using add() for atomic locking
    release_lock = lambda: cache.delete(lock_id)

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    source_url = "%s/apps/oaipmh/?verb=Identify" % source

    client = Client(source_url, registry)
    try:
        identify = client.identify()
    except AttributeError as e:
        msg = "Error reading repos identity: %s:%s" % (source, e)
        logger.error(msg)
        raise ReposReadError(msg)
    except error.ErrorBase as e:
        msg = "OAIPMH error: %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except URLError as e:
        logger.error(e)
        raise
    repos = identify.baseURL()
    import urlparse
    repos_url = urlparse.urlparse(repos)
    dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
    if dest_name != source:
        msg = "Source directory reports incorrect name: %s" % dest_name
        logger.error(msg)
        raise BadAccessError(msg)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(
        source + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc",
        registry)
    try:
        exps_metadata = [
            meta for (header, meta,
                      extra) in client.listRecords(metadataPrefix='oai_dc')
        ]
    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    local_ids = []
    for exp_metadata in exps_metadata:
        exp_id = exp_metadata.getField('identifier')[0]
        user = exp_metadata.getField('creator')[0]

        found_user = _get_or_create_user(source, user)

        #make sure experiment is publicish
        try:
            xmldata = getURL("%s/apps/reposproducer/expstate/%s/" %
                             (source, exp_id))
        except HTTPError as e:
            msg = "cannot get public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        try:
            exp_state = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not exp_state in [
                Experiment.PUBLIC_ACCESS_FULL,
                Experiment.PUBLIC_ACCESS_METADATA
        ]:
            msg = 'cannot ingest private experiments.' % exp_id
            logger.error(msg)
            raise BadAccessError(msg)

        # Get the usernames of isOwner django_user ACLs for the experiment
        try:
            xmldata = getURL("%s/apps/reposproducer/acls/%s/" %
                             (source, exp_id))

        except HTTPError as e:
            msg = "Cannot get acl list of experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)
        try:
            acls = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse acl list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        owners = []
        for acl in acls:
            if acl['pluginId'] == 'django_user' and acl['isOwner']:
                user = _get_or_create_user(source, acl['entityId'])
                owners.append(user.username)
            else:
                # FIXME: skips all other types of acl for now
                pass

        # Get the METS for the experiment
        metsxml = ""
        try:
            metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls" %
                             (source, exp_id))
            #metsxml = getURL("%s/experiment/metsexport/%s/"
            #% (source, exp_id))

        except HTTPError as e:
            msg = "cannot get METS for experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)

        # load schema and parametername for experiment keys
        try:
            key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE)
        except Schema.DoesNotExist as e:
            msg = "No ExperimentKeyService Schema found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            key_name = ParameterName.objects.get(name=settings.KEY_NAME)
        except ParameterName.DoesNotExist as e:
            msg = "No ExperimentKeyService ParameterName found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            xmldata = getURL("%s/apps/reposproducer/key/%s/" %
                             (source, exp_id))
        except HTTPError as e:
            msg = "cannot get key of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not xmldata:
            logger.warn(
                "Unable to retrieve experiment %s key.  Will try again later" %
                exp_id)
            return

        try:
            key_value = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse key list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not key_value:
            logger.warn(
                "Unable to retrieve experiment %s key value.  Will try again later"
                % exp_id)
            return

        logger.debug("retrieved key %s from experiment %s" %
                     (key_value, exp_id))
        exps = Experiment.objects.all()

        got_lock = True
        if not acquire_lock():
            logger.warning("another worker has access to consume experiment")
            return

        duplicate_exp = 0
        for exp in exps:
            #logger.warn("exp = %s" % exp.id)
            params = ExperimentParameter.objects.filter(
                name=key_name,
                parameterset__schema=key_schema,
                parameterset__experiment=exp)
            #logger.warn("params.count() = %s" % params.count())
            if params.count() >= 1:
                key = params[0].string_value
                if key == key_value:
                    duplicate_exp = exp.id
                    #logger.warn("found duplicate for %s" % duplicate_exp)
                    break

        if duplicate_exp:
            logger.warn(
                "Found duplicate experiment form %s exp %s to  exp %s" %
                (source, exp_id, duplicate_exp))
            if got_lock:
                release_lock()
            return

        # TODO: Need someway of updating and existing experiment.  Problem is
        # that copy will have different id from original, so need unique identifier
        # to allow matching

        # We have not pulled everything we need from producer and are ready to create
        # experiment.

        # Make placeholder experiment and ready metadata
        e = Experiment(
            title='Placeholder Title',
            approved=True,
            created_by=found_user,
            public_access=exp_state,
            locked=False  # so experiment can then be altered.
        )
        e.save()

        # store the key
        #eps, was_created = ExperimentParameterSet.objects.\
        #    get_or_create(experiment=e, schema=key_schema)
        #if was_created:
        #    logger.warn("was created")
        #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps,
        #    name=key_name,
        #    string_value=key_value)
        #if was_created:
        #    logger.warn("was created again")
        #ep.save()

        if got_lock:
            release_lock()

        local_id = e.id
        filename = path.join(e.get_or_create_directory(), 'mets_upload.xml')
        f = open(filename, 'wb+')
        f.write(metsxml)
        f.close()

        # Ingest this experiment META data and isOwner ACLS
        eid = None
        try:
            eid, sync_path = _registerExperimentDocument(filename=filename,
                                                         created_by=found_user,
                                                         expid=local_id,
                                                         owners=owners)
            logger.info('=== processing experiment %s: DONE' % local_id)
        except:
            # FIXME: what errors can mets return?
            msg = '=== processing experiment %s: FAILED!' \
                % local_id
            logger.error(msg)
            raise MetsParseError(msg)

        # FIXME: if METS parse fails then we should go back and delete the placeholder experiment

        exp = Experiment.objects.get(id=eid)

        # so that tardis does not copy the data
        for datafile in exp.get_datafiles():
            datafile.stay_remote = True
            datafile.save()

        #import nose.tools
        #nose.tools.set_trace()
        # FIXME: reverse lookup of URLs seem quite slow.
        # TODO: put this information into specific metadata schema attached to experiment
        exp.description += get_audit_message(source, exp_id)
        exp.save()

        local_ids.append(local_id)
    return local_ids
def transfer_experiment(source):
    """
    Pull public experiments from source into current mytardis.
    """

    #TODO: Cleanup error messages
    #TODO: does not transfer liences as not part of METS format.
    #NOTE: As this is a pull we trust the data from the other tardis
    # Check identity of the feed
    from oaipmh.client import Client
    from oaipmh import error
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    from django.core.cache import cache
    from django.utils.hashcompat import md5_constructor as md5

    # The cache key consists of the task name and the MD5 digest
    # of the feed URL.
    cache_key = md5("token").hexdigest()
    lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
    LOCK_EXPIRE = 60 * 5
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    # memcache delete is very slow, but we have to use it to take
    # advantage of using add() for atomic locking
    release_lock = lambda: cache.delete(lock_id)

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    source_url = "%s/apps/oaipmh/?verb=Identify" % source

    client = Client(source_url, registry)
    try:
        identify = client.identify()
    except AttributeError as e:
        msg = "Error reading repos identity: %s:%s" % (source, e)
        logger.error(msg)
        raise ReposReadError(msg)
    except error.ErrorBase as e:
        msg = "OAIPMH error: %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except URLError as e:
        logger.error(e)
        raise
    repos = identify.baseURL()
    import urlparse
    repos_url = urlparse.urlparse(repos)
    dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
    if dest_name != source:
        msg = "Source directory reports incorrect name: %s" % dest_name
        logger.error(msg)
        raise BadAccessError(msg)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(source
        + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
    try:
        exps_metadata = [meta
            for (header, meta, extra)
            in client.listRecords(metadataPrefix='oai_dc')]
    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    local_ids = []
    for exp_metadata in exps_metadata:
        exp_id = exp_metadata.getField('identifier')[0]
        user = exp_metadata.getField('creator')[0]

        found_user = _get_or_create_user(source, user)

        #make sure experiment is publicish
        try:
            xmldata = getURL("%s/apps/reposproducer/expstate/%s/"
            % (source, exp_id))
        except HTTPError as e:
            msg = "cannot get public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        try:
            exp_state = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not exp_state in [Experiment.PUBLIC_ACCESS_FULL,
                              Experiment.PUBLIC_ACCESS_METADATA]:
            msg = 'cannot ingest private experiments.' % exp_id
            logger.error(msg)
            raise BadAccessError(msg)

        # Get the usernames of isOwner django_user ACLs for the experiment
        try:
            xmldata = getURL("%s/apps/reposproducer/acls/%s/"
            % (source, exp_id))

        except HTTPError as e:
            msg = "Cannot get acl list of experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)
        try:
            acls = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse acl list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        owners = []
        for acl in acls:
            if acl['pluginId'] == 'django_user' and acl['isOwner']:
                user = _get_or_create_user(source, acl['entityId'])
                owners.append(user.username)
            else:
                # FIXME: skips all other types of acl for now
                pass

        # Get the METS for the experiment
        metsxml = ""
        try:
            metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls"
            % (source, exp_id))
            #metsxml = getURL("%s/experiment/metsexport/%s/"
            #% (source, exp_id))

        except HTTPError as e:
            msg = "cannot get METS for experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)

        # load schema and parametername for experiment keys
        try:
            key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE)
        except Schema.DoesNotExist as e:
            msg = "No ExperimentKeyService Schema found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            key_name = ParameterName.objects.get(name=settings.KEY_NAME)
        except ParameterName.DoesNotExist as e:
            msg = "No ExperimentKeyService ParameterName found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            xmldata = getURL("%s/apps/reposproducer/key/%s/"
            % (source, exp_id))
        except HTTPError as e:
            msg = "cannot get key of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not xmldata:
            logger.warn("Unable to retrieve experiment %s key.  Will try again later" % exp_id)
            return

        try:
            key_value = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse key list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not key_value:
            logger.warn("Unable to retrieve experiment %s key value.  Will try again later" % exp_id)
            return

        logger.debug("retrieved key %s from experiment %s" % (key_value, exp_id))
        exps = Experiment.objects.all()

        got_lock = True
        if not acquire_lock():
            logger.warning("another worker has access to consume experiment")
            return

        duplicate_exp = 0
        for exp in exps:
            #logger.warn("exp = %s" % exp.id)
            params = ExperimentParameter.objects.filter(name=key_name,
                                    parameterset__schema=key_schema,
                                    parameterset__experiment=exp)
            #logger.warn("params.count() = %s" % params.count())
            if params.count() >= 1:
                key = params[0].string_value
                if key == key_value:
                    duplicate_exp = exp.id
                    #logger.warn("found duplicate for %s" % duplicate_exp)
                    break

        if duplicate_exp:
            logger.warn("Found duplicate experiment form %s exp %s to  exp %s"
                % (source, exp_id, duplicate_exp))
            if got_lock:
                release_lock()
            return

        # TODO: Need someway of updating and existing experiment.  Problem is
        # that copy will have different id from original, so need unique identifier
        # to allow matching

        # We have not pulled everything we need from producer and are ready to create
        # experiment.

        # Make placeholder experiment and ready metadata
        e = Experiment(
            title='Placeholder Title',
            approved=True,
            created_by=found_user,
            public_access=exp_state,
            locked=False  # so experiment can then be altered.
            )
        e.save()

        # store the key
        #eps, was_created = ExperimentParameterSet.objects.\
        #    get_or_create(experiment=e, schema=key_schema)
        #if was_created:
        #    logger.warn("was created")
        #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps,
        #    name=key_name,
        #    string_value=key_value)
        #if was_created:
        #    logger.warn("was created again")
        #ep.save()

        if got_lock:
            release_lock()

        local_id = e.id
        filename = path.join(e.get_or_create_directory(),
                             'mets_upload.xml')
        f = open(filename, 'wb+')
        f.write(metsxml)
        f.close()

        # Ingest this experiment META data and isOwner ACLS
        eid = None
        try:
            eid, sync_path = _registerExperimentDocument(filename=filename,
                                               created_by=found_user,
                                               expid=local_id,
                                               owners=owners)
            logger.info('=== processing experiment %s: DONE' % local_id)
        except:
            # FIXME: what errors can mets return?
            msg = '=== processing experiment %s: FAILED!' \
                % local_id
            logger.error(msg)
            raise MetsParseError(msg)

        # FIXME: if METS parse fails then we should go back and delete the placeholder experiment

        exp = Experiment.objects.get(id=eid)

        # so that tardis does not copy the data
        for datafile in exp.get_datafiles():
            datafile.stay_remote = True
            datafile.save()

        #import nose.tools
        #nose.tools.set_trace()
        # FIXME: reverse lookup of URLs seem quite slow.
        # TODO: put this information into specific metadata schema attached to experiment
        exp.description += get_audit_message(source, exp_id)
        exp.save()

        local_ids.append(local_id)
    return local_ids
Ejemplo n.º 16
0
def image_proxy(req, headers=None, cache=None, timeout=None, proxy_info=None):
    import httplib2
    import gd

    form = ImageProxyForm(req.GET)
    if not form.is_valid():
        return HttpResponseBadRequest()

    url = form.cleaned_data['url']
    width = form.cleaned_data.get('w')
    height = form.cleaned_data.get('h')
    format = form.cleaned_data.get('f')
    timeout = form.cleaned_data.get('t') or timeout
    referrer = form.cleaned_data.get('r')

    headers = headers or {}
    if referrer:
        headers['Referer'] = referrer

    conn = httplib2.Http(cache, timeout, proxy_info)
    result, content = conn.request(url, headers=headers)

    status = int(result['status'])
    if status not in [200, 304]:
        return HttpResponse(status=status)

    try:
        img = Image.open(StringIO(content))
    except IOError:
        raise Http404

    w, h = img.size
    if width and height:
        if (w <= width and h <= height):
            need_resize = False
        else:
            need_resize = True

            w_ratio = width / float(w)
            h_ratio = height / float(h)
            # use smaller ratio
            ratio = min(w_ratio, h_ratio)
            size = (int(w * ratio), int(h * ratio))
    else:
        # either width or height is undefined
        if height is not None and h > height:
            need_resize = True
            ratio = height / float(h)
            size = (int(w * ratio), int(h * ratio))
        elif width is not None and w > width:
            need_resize = True
            ratio = width / float(w)
            size = (int(w * ratio), int(h * ratio))
        else:
            need_resize = False

    format = img.format.lower()
    if format == 'gif':
        content_type = 'image/gif'

        if need_resize:
            newimage = gd.image(size)

            tmp = StringIO()
            img.save(tmp, 'PNG')
            tmp.seek(0)

            gdimage = gd.image(tmp, 'png')

            # resize
            gdimage.copyResizedTo(newimage, (0, 0), (0, 0), size, img.size)

            # get result
            output = StringIO()
            newimage.writeGif(output)
            # override image binary content
            content = output.getvalue()

    else:
        # force output to be JPEG
        content_type = 'image/jpeg'

        if need_resize or format != 'image/jpeg':
            # resize
            if need_resize:
                img = img.resize(size)

            # change color mode to RGB if not
            if img.mode != 'RGB':
                img = img.convert("RGB")

            output = StringIO()
            img.save(output, 'JPEG')
            # override image binary content
            content = output.getvalue()

    CACHE_TIMEOUT = 86400
    res = HttpResponse(content, content_type=content_type)
    res['Content-Length'] = str(len(content))
    res['ETag'] = '"%s"' % md5(content).hexdigest()
    res['Last-Modified'] = http_date()
    res['Expires'] = http_date(time.time() + CACHE_TIMEOUT)
    patch_cache_control(res, max_age=CACHE_TIMEOUT)
    return res
Ejemplo n.º 17
0
def fetch_photos_for_flickr_user(results, nsid, page=None):
    from flickr.tasks import update_flickr_user_camera
    from flickr.tasks import flickr_user_fetch_photos_complete
    
    nsid_digest = md5(nsid).hexdigest()
    lock_id = "%s-lock-%s" % ("fetch_photos", nsid_digest)
    
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
     
    if page or acquire_lock():
        flickr_user = FlickrUser.objects.get(nsid = nsid)
        
        if flickr_user.count_photos == 0:
            return flickr_user_fetch_photos_complete.delay(None, flickr_user.nsid)
        
        per_page = 100
        
        if not page:
            page = math.ceil(float(flickr_user.count_photos) / float(per_page))
    
        logger.info("Fetching page %s for %s" % (page, flickr_user.username))
    
        try:
            # Fetch a page of photos
            photos_rsp = flickr.people.getPublicPhotos(
                user_id=flickr_user.nsid,
                per_page=per_page,
                page=page,
                extras="date_taken,date_upload,license,owner_name,media,path_alias,count_comments,count_faves,geo",
                format="json",
                nojsoncallback="true",
            )
            json = simplejson.loads(photos_rsp)
        
            if json and json['stat'] == 'ok':
                pages = json['photos']['pages']
                photo_updates = []
            
                for photo in json['photos']['photo']:
                    if not flickr_user.date_last_photo_update or int(photo['dateupload']) >= int(flickr_user.date_last_photo_update):
                        photo_updates.append(process_flickr_photo.subtask((photo, flickr_user.nsid), link=update_flickr_user_camera.subtask((flickr_user.nsid, ))))
                        photo_update_date = photo['dateupload']
                        
                if page == 1:
                    logger.info("This is the last page (%s) for %s!" % (pages, flickr_user.username))
                    if photo_updates:
                        return chord(photo_updates)(flickr_user_fetch_photos_complete.subtask((flickr_user.nsid, )))
                    else:
                        return flickr_user_fetch_photos_complete.delay(None, flickr_user.nsid)
                
                else:
                    logger.info("Firing tasks for page %s of %s for %s" % (page, pages, flickr_user.username))
                    next_page = page - 1
                
                
                    pct = 100 - ((float(page) / float(pages)) * 100)
                    logger.info("pct should be: %s/%s * 100 = %s" % (page, pages, pct))
                
                    logger.info("Push it.")
                    values = {
                        'secret': settings.PUSHY_SECRET,
                        'user_id': flickr_user.nsid,
                        'message': simplejson.dumps({'type': 'fetch_photos.update_progress_bar', 'data': {'pct': pct}}),
                    }
                    data = urllib.urlencode(values)
                    req = urllib2.Request(settings.PUSHY_URL_LOCAL, data)
                    
                    try:
                        response = urllib2.urlopen(req)
                    except:
                        logger.error("Problem calling pushy from photos fetch.")
                    
                    if photo_updates:
                        flickr_user.date_last_photo_update = photo_update_date
                        flickr_user.save()
                        
                        return chord(photo_updates)(fetch_photos_for_flickr_user.subtask((flickr_user.nsid, next_page, )))
                    else:
                        return fetch_photos_for_flickr_user.delay(None, flickr_user.nsid, next_page)
                
            else:
                logger.error("Flickr api query did not respond OK calling getPublicPhotos for %s in fetch_photos, will try again." % (flickr_user.nsid))
                return fetch_photos_for_flickr_user.retry(countdown=5)
            
        except URLError, e:
            logger.error("Problem talking to Flickr when calling getPublicPhotos for %s in fetch_photos (URLError), will try again. Reason: %s" % (flickr_user.nsid, e.reason))
            return fetch_photos_for_flickr_user.retry(countdown=5)
        
        except FlickrError, e:
            logger.error("Problem talking to Flickr when calling getPublicPhotos for %s in fetch_photos (FlickrError), re-scheduling task.\n Error: %s" % (flickr_user.nsid, e))
            raise fetch_photos_for_flickr_user.retry(countdown=5)
Ejemplo n.º 18
0
def get_chksum(title, url, pub_date):
    return md5("%s%s%s" % (title, url, pub_date)).hexdigest()