class LockssomaticSpaceSchema(TypeOfSpaceSchema): collection_iri = UnicodeString(max=256) content_provider_id = UnicodeString(max=32) checksum_type = UnicodeString(max=64) keep_local = Bool() au_size = Int() sd_iri = URL(max=256) external_domain = URL()
def isurl(s, require_tld=True): u = URL(add_http=False, require_tld=require_tld) try: u.to_python(s) return True except Invalid: url_local = re.compile(r'//localhost(:|/)').search(s) if url_local is not None: return True return False
class ApplnValidator(Schema): name = ByteString(not_empty=True) email = Email() dob = DateConverter(month_style='iso') gender = OneOf(['M', 'F']) applntype = OneOf(['new', 'renewal']) race1_name = ByteString(not_empty=True) race1_location = ByteString(not_empty=True) race1_date = DateConverter(month_style='iso') race1_distance = Number(min=0, max=200) race1_units = OneOf(['miles', 'km']) race1_time = TimeOptHoursConverter() race1_resultslink = URL() race2_name = ByteString(not_empty=True) race2_location = ByteString(not_empty=True) race2_date = DateConverter(month_style='iso') race2_distance = Number(min=0, max=200) race2_units = OneOf(['miles', 'km']) race2_time = TimeOptHoursConverter() race2_resultslink = URL()
class UriData(Schema): """Validate URI data received from CSV or user input. All fields are required, cannot be None and no extra fields are allowed. """ title = String(not_empty=True) URI = URL(not_empty=True) notify = OneOf( ["always", "valid", "invalid"], not_empty=True )
def __init__(self, service_key=None, http_cache=None): self._service_key = service_key self._http_cache = http_cache self._as_url = URL(add_http=True) self._http_client = Http(cache=self._http_cache, **DEFAULT_HTTP_ARGS)
class GoogleFeedSearchService(object): """ This class provides a simple feed searching service based on google APIs. You can query by keywords, urls of web pages linking to feeds or direct urls to feeds. It is a very fast path for common feeds, but *not comprehensive*. If you want to support small-fry feeds that have not been discovered by google, consider chaining this together with the HandScrapedFeedSearchService as a fallback. """ def __init__(self, service_key=None, http_cache=None): self._service_key = service_key self._http_cache = http_cache self._as_url = URL(add_http=True) self._http_client = Http(cache=self._http_cache, **DEFAULT_HTTP_ARGS) def find_feeds(self, query, max_results=5): try: rs = self._find_feeds(query) rs = uniquify_results(rs) return rs[0:max_results] except: log.error("Error searching for feeds. query=%s: %s" % (query, traceback.format_exc())) return [] def _find_feeds(self, query): # firstly, could this be a url? if so, let's try that... try: url = self._as_url.to_python(query) rs = self._search_url_any(url) if len(rs) > 0: return rs except Invalid: # nope, let's move on... pass # try searching terms... try: return self._search_terms(query) except: return [] def is_feed(self, url): """ test whether there is a feed at the url specified. (not referred to in the page specified, actually there) """ return self._load_feed(url) is not None def _search_terms(self, query): rs = self._query(FIND_FEED, query) if rs is not None: return [Dibject(url=r["url"], title=r.get('title', ''), link=r.get('link', '')) for r in rs.entries] else: return [] def _load_feed(self, url): rs = self._query(LOAD_FEED, url) if rs is None: return None return Dibject(url=url, title=rs.feed.get('title', ''), link=rs.feed.get('link', '')) def _search_url_any(self, url): # okay, well this could be the url of a feed or # it could be a web page that has a feed link in # it... # try searching it as a web page first... rs = self._search_terms('site:%s' % url) if len(rs) > 0: return rs # hmm didn't seem to find anything, but it may be the # actual url of a feed... rs = self._load_feed(url) if rs is not None: return [rs] else: return [] def _query(self, service, q): query_url = service query_url += urllib.quote_plus(q) if self._service_key: query_url += "&key=%s" % self._service_key log.debug("Issuing query %s" % query_url) result = self._http_client.request(query_url, 'GET') if result is None: log.error("No response to query %s" % query_url) return None response, content = result log.debug("response was: %s, %s" % (response, content)) if response.status != 200: log.error("Error response to %s: (%s, %s)" % (query_url, response, content)) return None rr = json_wake(content) if not hasattr(rr, 'responseData'): return None return rr.responseData
class HandScrapedFeedSearchService(object): """ This is a slow path hand scraper / checker feed search. It does not handle keyword searches, only urls which it laboriously fetches and inspects. If there is a feed to be found, it will try pretty hard (at the expense of time) to prove it. """ def __init__(self, http_cache=None): self._http_cache = http_cache self._as_url = URL(add_http=True) self._http_client = Http(cache=self._http_cache, **DEFAULT_HTTP_ARGS) def find_feeds(self, query, max_results=5): try: rs = self._find_feeds(query) rs = uniquify_results(rs) return rs[0:max_results] except: log.error("Error finding feeds in %s, %s" % (query, traceback.format_exc())) return [] def _find_feeds(self, query): try: url = self._as_url.to_python(query) except Invalid: return [] rs = self._search_url(url) if len(rs) > 0: return rs else: return [] def is_feed(self, url): try: ff, response, content = self._check_for_feed(url) return ff is not None except: log.error("Error determining existence of feed %s: %s" % (url, traceback.format_exc())) def _search_url(self, url): feeds = [] try: # check to see if the URL points to a feed ff, response, content = self._check_for_feed(url) if ff is not None: return [ff] if response is None or content is None: return [] # dig feed links out... ct = get_content_type(response.get('content-type', '')).lower() if ct in DEFINITE_HTML_CONTENT_TYPES or ct in AMBIGUOUS_XML_CONTENT_TYPES: try: feed_urls = self._find_feed_links(content) except: log.error("Error scraping feed links from %s: %s" % (url, traceback.format_exc())) else: for furl in feed_urls: # mm make sure it's not relative... furl = urlparse.urljoin(url, furl) ff, response, content = self._check_for_feed(furl) if ff is not None: feeds.append(ff) return feeds except ForbiddenHost, e: log.warn(e) return [] except:
class DSpaceSpaceSchema(TypeOfSpaceSchema): sd_iri = URL(max=256) user = UnicodeString(max=64) password = UnicodeString(max=64) metadata_policy = UnicodeString() # JSONField ... archive_format = OneOf(_flatten(models.DSpace.ARCHIVE_FORMAT_CHOICES))
class PipelineSchema(Schema): api_key = UnicodeString(max=256) api_username = UnicodeString(max=256) description = UnicodeString(max=256) enabled = Bool() remote_name = Any(validators=[IPAddress(), URL()])