def write_details_to_csv(csv_file, statistic, data): cnt = 0 writerow = csv_file.writerow for (location, details) in data.items(): loc = location_list[location] for key in sorted(details.keys()): row = details[key] source = row[2] source_url = "" url = urlparse.urlparse(source) if url[0] != "": source_url = source (head, tail) = split(url[2]) source = tail.replace("%20", " ") cnt += 1 writerow([ statistic, row[0], loc[0], loc[1], loc[2], loc[3], key, s3_str(source), row[1], source_url ])
def parse_rss_2_cap(message): """ Parse RSS Feeds into the CAP Module """ db = current.db s3db = current.s3db table = s3db.msg_rss message_id = message.message_id record = db(table.message_id == message_id).select( table.id, table.channel_id, table.title, table.from_address, table.body, table.date, table.location_id, table.author, limitby=(0, 1)).first() if not record: return pstable = s3db.msg_parsing_status # not adding (pstable.channel_id == record.channel_id) to query # because two channels (http://host.domain/eden/cap/public.rss and # (http://host.domain/eden/cap/alert.rss) may contain common url # eg. http://host.domain/eden/cap/public/xx.cap pquery = (pstable.message_id == message_id) prows = db(pquery).select(pstable.id, pstable.is_parsed) for prow in prows: if prow.is_parsed: return alert_table = s3db.cap_alert info_table = s3db.cap_info # Is this an Update or a Create? # @ToDo: Use guid? # Use Body body = record.body or record.title query = (info_table.description == body) exists = db(query).select(info_table.id, limitby=(0, 1)).first() author = record.author if author: ptable = s3db.pr_person # https://code.google.com/p/python-nameparser/ from nameparser import HumanName name = HumanName(author) first_name = name.first middle_name = name.middle last_name = name.last query = (ptable.first_name == first_name) & \ (ptable.middle_name == middle_name) & \ (ptable.last_name == last_name) pexists = db(query).select(ptable.id, limitby=(0, 1)).first() if pexists: person_id = pexists.id else: person_id = ptable.insert(first_name=first_name, middle_name=middle_name, last_name=last_name) s3db.update_super(ptable, {"id": person_id}) else: person_id = None if exists: # @ToDo: Use XSLT info_id = exists.id db(info_table.id == info_id).update( headline=record.title, description=body, created_on=record.date, #location_id = record.location_id, #person_id = person_id, ) else: # Embedded link url = record.from_address import_xml = s3db.resource("cap_alert").import_xml stylesheet = os.path.join(current.request.folder, "static", "formats", "cap", "import.xsl") try: file = fetch(url) except HTTPError as e: import base64 rss_table = s3db.msg_rss_channel query = (rss_table.channel_id == record.channel_id) channel = db(query).select(rss_table.date, rss_table.etag, rss_table.url, rss_table.username, rss_table.password, limitby=(0, 1)).first() username = channel.username password = channel.password if e.code == 401 and username and password: request = urllib2.Request(url) base64string = base64.encodestring("%s:%s" % (username, password)) request.add_header("Authorization", "Basic %s" % base64string) else: request = None try: file = urlopen(request).read() if request else fetch(url) except HTTPError as e: # Check if there are links to look into ltable = s3db.msg_rss_link query_ = (ltable.rss_id == record.id) & (ltable.deleted != True) rows_ = db(query_).select(ltable.type, ltable.url) url_format = "{uri.scheme}://{uri.netloc}/".format url_domain = url_format(uri=urlparse.urlparse(url)) for row_ in rows_: url = row_.url if url and row_.type == "application/cap+xml" and \ url_domain == url_format(uri=urlparse.urlparse(url)): # Same domain, so okey to use same username/pwd combination if e.code == 401 and username and password: request = urllib2.Request(url) request.add_header("Authorization", "Basic %s" % base64string) else: request = None try: file = urlopen( request).read() if request else fetch(url) except HTTPError as e: current.log.error( "Getting content from link failed: %s" % e) else: # Import via XSLT import_xml(StringIO(file), stylesheet=stylesheet, ignore_errors=True) else: # Import via XSLT import_xml(StringIO(file), stylesheet=stylesheet, ignore_errors=True) else: # Public Alerts # eg. http://host.domain/eden/cap/public/xx.cap # Import via XSLT import_xml(StringIO(file), stylesheet=stylesheet, ignore_errors=True) # No Reply return
def fetch_cap(cls, entry): """ Fetch and parse the CAP-XML source for an RSS/Atom feed entry @param entry: the RSS/Atom feed entry (msg_rss Row), containing: - id - channel_id - from_address @returns: tuple (url, tree, version, error) - url = the URL of the CAP-XML source used - tree = ElementTree of the CAP source - version = the detected CAP version - error = error message if unsuccessful, else None """ db = current.db s3db = current.s3db AlertImporter = s3db.cap_ImportAlert # Get the URLs for all <link>s in this entry which are marked as cap+xml ltable = s3db.msg_rss_link query = (ltable.rss_id == entry.id) & \ (ltable.type == "application/cap+xml") & \ (ltable.deleted == False) links = db(query).select(ltable.url) urls = [link.url for link in links if link.url] # Add the main <link> of the entry (=from_address) as fallback if entry.from_address: urls.append(entry.from_address) # Simple domain formatter for URLs url_format = "{uri.scheme}://{uri.netloc}/".format # Get domain/username/password for the channel ctable = s3db.msg_rss_channel query = (ctable.channel_id == entry.channel_id) & \ (ctable.deleted == False) channel = db(query).select( ctable.url, ctable.username, ctable.password, limitby=(0, 1), ).first() if channel: channel_domain = url_format(uri=urlparse.urlparse(channel.url)) username = channel.username password = channel.password else: channel_domain = None username = password = None # Iterate over <link> URLs to find the CAP source errors = [] cap_url = version = tree = None for url in urls: error = None current.log.debug("Fetching CAP-XML from %s" % url) # If same domain as channel, use channel credentials for auth if channel_domain and url_format( uri=urlparse.urlparse(url)) == channel_domain: opener = AlertImporter.opener(url, username=username, password=password) else: opener = AlertImporter.opener(url) # Fetch the link content try: content = opener.open(url) except HTTPError as e: # HTTP status error = "HTTP %s: %s" % (e.code, e.read()) except URLError as e: # URL Error (network error) error = "CAP source unavailable (%s)" % e.reason except Exception: # Other error (local error) import sys error = sys.exc_info()[1] else: # Try parse tree, version, error = AlertImporter.parse_cap(content) if tree: # XML source found => proceed to import cap_url = url break elif error: errors.append(error) else: errors.append("Not a valid CAP source: %s" % url) if errors: error = "\n".join(errors) else: error = None return cap_url, tree, version, error
def notify(cls, resource_id): """ Asynchronous task to notify a subscriber about updates, runs a POST?format=msg request against the subscribed controller which extracts the data and renders and sends the notification message (see send()). @param resource_id: the pr_subscription_resource record ID """ _debug = current.log.debug _debug("S3Notifications.notify(resource_id=%s)" % resource_id) db = current.db s3db = current.s3db stable = s3db.pr_subscription rtable = db.pr_subscription_resource ftable = s3db.pr_filter # Extract the subscription data join = stable.on(rtable.subscription_id == stable.id) left = ftable.on(ftable.id == stable.filter_id) # @todo: should not need rtable.resource here row = db(rtable.id == resource_id).select(stable.id, stable.pe_id, stable.frequency, stable.notify_on, stable.method, stable.email_format, stable.attachment, rtable.id, rtable.resource, rtable.url, rtable.last_check_time, ftable.query, join=join, left=left).first() if not row: return True s = getattr(row, "pr_subscription") r = getattr(row, "pr_subscription_resource") f = getattr(row, "pr_filter") # Create a temporary token to authorize the lookup request auth_token = str(uuid4()) # Store the auth_token in the subscription record r.update_record(auth_token=auth_token) db.commit() # Construct the send-URL public_url = current.deployment_settings.get_base_public_url() lookup_url = "%s/%s/%s" % (public_url, current.request.application, r.url.lstrip("/")) # Break up the URL into its components purl = list(urlparse.urlparse(lookup_url)) # Subscription parameters # Date (must ensure we pass to REST as tz-aware) last_check_time = s3_encode_iso_datetime(r.last_check_time) query = {"subscription": auth_token, "format": "msg"} if "upd" in s.notify_on: query["~.modified_on__ge"] = "%sZ" % last_check_time else: query["~.created_on__ge"] = "%sZ" % last_check_time # Filters if f.query: from .s3filter import S3FilterString resource = s3db.resource(r.resource) fstring = S3FilterString(resource, f.query) for k, v in fstring.get_vars.items(): if v is not None: if k in query: value = query[k] if type(value) is list: value.append(v) else: query[k] = [value, v] else: query[k] = v query_nice = s3_unicode(fstring.represent()) else: query_nice = None # Add subscription parameters and filters to the URL query, and # put the URL back together query = urlencode(query) if purl[4]: query = "&".join((purl[4], query)) page_url = urlparse.urlunparse([purl[0], # scheme purl[1], # netloc purl[2], # path purl[3], # params query, # query purl[5], # fragment ]) # Serialize data for send (avoid second lookup in send) data = json.dumps({"pe_id": s.pe_id, "notify_on": s.notify_on, "method": s.method, "email_format": s.email_format, "attachment": s.attachment, "resource": r.resource, "last_check_time": last_check_time, "filter_query": query_nice, "page_url": lookup_url, "item_url": None, }) # Send the request _debug("Requesting %s" % page_url) req = urllib2.Request(page_url, data=data.encode("utf-8")) req.add_header("Content-Type", "application/json") success = False try: response = json.loads(urlopen(req).read()) message = response["message"] if response["status"] == "success": success = True except HTTPError as e: message = ("HTTP %s: %s" % (e.code, e.read())) except: exc_info = sys.exc_info()[:2] message = ("%s: %s" % (exc_info[0].__name__, exc_info[1])) _debug(message) # Update time stamps and unlock, invalidate auth token intervals = s3db.pr_subscription_check_intervals interval = datetime.timedelta(minutes=intervals.get(s.frequency, 0)) if success: last_check_time = datetime.datetime.utcnow() next_check_time = last_check_time + interval r.update_record(auth_token=None, locked=False, last_check_time=last_check_time, next_check_time=next_check_time) else: r.update_record(auth_token=None, locked=False) db.commit() # Done return message