def valid_url(self, key, opt): """ Validate a url field. """ if url.validate(opt): return opt return RecipeSchemaError( "{} should be a 'url' field but was passed '{}'.".format(key, opt))
def valid_url(self, key, opt): """ Validate a url field. """ if url.validate(opt): return opt return RecipeSchemaError( "{} should be a 'url' field but was passed '{}'." .format(key, opt))
def _prepare(obj, requires=[], recipe=None, type='event', org_id=None, extract=True): """ Prepare a content item or an event. """ # check required fields _check_requires(obj, requires, type=type) # validate status if type == 'event': if 'status' in obj: if not obj.get('status', None) in EVENT_STATUSES: raise RequestError( 'Invalid event status: {status}'.format(**obj)) if obj['status'] == 'deleted': raise RequestError( 'You cannot create an Event with status of "deleted."') # validate type if type == 'content_item': if not obj.get('type', None) in CONTENT_ITEM_TYPES: raise RequestError( 'Invalid content item type: {type}'.format(**obj)) # get rid of ``id`` if it somehow got in here. obj.pop('id', None) obj.pop('org_id', None) # normalize the url if type == 'event': obj['url'] = _prepare_url(obj, 'url', expand=True, canonicalize=False) elif type == 'content_item': obj['url'] = _prepare_url(obj, 'url', expand=True, canonicalize=True) # sanitize creation date obj['created'] = _prepare_date(obj, 'created') if not obj['created']: obj.pop('created') # sanitize text/html fields obj['title'] = _prepare_str(obj, 'title', obj['url']) obj['description'] = _prepare_str( obj, 'description', obj['url']) obj['body'] = _prepare_str(obj, 'body', obj['url']) # set org id obj['org_id'] = org_id # check img url if not url.validate(obj.get('img_url', None)): obj['img_url'] = None # determine provenance. obj = _provenance(obj, recipe, type) # if type is content items and we're extracting. do it. if type == 'content_item' and extract and obj.get('url', None): cr = extract_cache.get(obj.get('url'), type=obj.get('type', None)) if not cr.value: extract_cache.invalidate( obj.get('url'), type=obj.get('type', None)) pass # merge extracted data with object. else: # merge extracted authors. for k, v in cr.value.items(): if not obj.get(k, None): obj[k] = v # preference extracted data if k in ['description', 'body']: obj[k] = v elif k == 'authors': if not k in obj: obj[k] = v else: for vv in v: if vv not in obj[k]: obj[k].append(vv) # swap bad images. tn = _prepare_thumbnail(obj, 'img_url') if not tn: img = cr.value.get('img_url', None) if img: obj['img_url'] = img obj['thumbnail'] = _prepare_thumbnail(obj, 'img_url') else: obj['thumbnail'] = tn else: obj['thumbnail'] = _prepare_thumbnail(obj, 'img_url') # set domain obj['domain'] = url.get_domain(obj['url']) # return prepped object return obj
def _prepare(obj, requires=[], recipe=None, type='event', org_id=None, extract=True): """ Prepare a content item or an event. """ # check required fields _check_requires(obj, requires, type=type) # validate status if type == 'event': if 'status' in obj: if not obj.get('status', None) in EVENT_STATUSES: raise RequestError( 'Invalid event status: {status}'.format(**obj)) if obj['status'] == 'deleted': raise RequestError( 'You cannot create an Event with status of "deleted."') # validate type if type == 'content_item': if not obj.get('type', None) in CONTENT_ITEM_TYPES: raise RequestError( 'Invalid content item type: {type}'.format(**obj)) # get rid of ``id`` if it somehow got in here. obj.pop('id', None) obj.pop('org_id', None) # normalize the url if type == 'event': obj['url'] = _prepare_url(obj, 'url', expand=True, canonicalize=False) elif type == 'content_item': obj['url'] = _prepare_url(obj, 'url', expand=True, canonicalize=True) # sanitize creation date obj['created'] = _prepare_date(obj, 'created') if not obj['created']: obj.pop('created') # sanitize text/html fields obj['title'] = _prepare_str(obj, 'title', obj['url']) obj['description'] = _prepare_str(obj, 'description', obj['url']) obj['body'] = _prepare_str(obj, 'body', obj['url']) # set org id obj['org_id'] = org_id # check img url if not url.validate(obj.get('img_url', None)): obj['img_url'] = None # determine provenance. obj = _provenance(obj, recipe, type) # if type is content items and we're extracting. do it. if type == 'content_item' and extract and obj.get('url', None): cr = extract_cache.get(obj.get('url'), type=obj.get('type', None)) if not cr.value: extract_cache.invalidate(obj.get('url'), type=obj.get('type', None)) pass # merge extracted data with object. else: # merge extracted authors. for k, v in cr.value.items(): if not obj.get(k, None): obj[k] = v # preference extracted data if k in ['description', 'body']: obj[k] = v elif k == 'authors': if not k in obj: obj[k] = v else: for vv in v: if vv not in obj[k]: obj[k].append(vv) # swap bad images. tn = _prepare_thumbnail(obj, 'img_url') if not tn: img = cr.value.get('img_url', None) if img: obj['img_url'] = img obj['thumbnail'] = _prepare_thumbnail(obj, 'img_url') else: obj['thumbnail'] = tn else: obj['thumbnail'] = _prepare_thumbnail(obj, 'img_url') # set domain obj['domain'] = url.get_domain(obj['url']) # return prepped object return obj