Beispiel #1
0
def _writeXml(f, id, metadata):
    f.write("<record identifier=\"%s\">" % util.xmlEscape(id.identifier))
    for k, v in metadata.items():
        if k in ["datacite", "crossref"]:
            v = util.removeXmlDeclaration(v)
        else:
            v = util.xmlEscape(v)
        f.write(("<element name=\"%s\">%s</element>" %\
          (util.xmlEscape(k), v)).encode("UTF-8"))
    f.write("</record>")
Beispiel #2
0
def mark_paras(txt):
	"""
	Given plain text where each line is a separate paragraph, returns
	a safe XML (= escaped content) with <p>-marked paras.
	The safeness is implicit; for use in a jinja2 template, use |safe.
	"""
	if not txt: return ''
	return '\n'.join('<p>%s</p>' % util.xmlEscape(line.strip()) for line in txt.splitlines() if line.strip())
Beispiel #3
0
def xml_str(val, key=None):
	"Unicode, xml-safe variant of `val`. `key` is for debug only."
	if val is None:
		return ''
	elif type(val) in (str,unicode): 
		val = util.xmlEscape(val)
		if type(val) == str: val = val.decode('utf8','replace')
	elif type(val) in (list,tuple,set):
		val = type(val)(xml_str(x,key) for x in val)
	elif type(val) in (long, int, float):
		val = str(val)
	else: raise ValueError, "Can't handle type %r for key %r" % (type(val), key,)
	return val
Beispiel #4
0
    def meta(self, refName):
        """Generate an openmensa XML meta feed from the static json file using an XML template"""
        with open(metaTemplateFile) as f:
            template = f.read()

        for reference, mensa in self.canteens.items():
            if refName != reference:
                continue

            data = {
                "name": mensa["name"],
                "address": mensa["address"],
                "city": mensa["city"],
                "phone": mensa['phone'],
                "latitude": mensa["latitude"],
                "longitude": mensa["longitude"],
                "feed": self.urlTemplate.format(metaOrFeed='feed', mensaReference=urllib.parse.quote(reference)),
                "source": mensa["source"],
            }
            openingTimes = {}
            pattern = re.compile(
                r"([A-Z][a-z])(\s*-\s*([A-Z][a-z]))?\s*(\d{1,2}):(\d{2})\s*-\s*(\d{1,2}):(\d{2}) Uhr")
            m = re.findall(pattern, mensa["times"])
            for result in m:
                fromDay, _, toDay, fromTimeH, fromTimeM, toTimeH, toTimeM = result
                openingTimes[fromDay] = "%02d:%02d-%02d:%02d" % (
                    int(fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM))
                if toDay:
                    select = False
                    for short, long in weekdays_map:
                        if short == fromDay:
                            select = True
                        elif select:
                            openingTimes[short] = "%02d:%02d-%02d:%02d" % (
                                int(fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM))
                        if short == toDay:
                            select = False

                for short, long in weekdays_map:
                    if short in openingTimes:
                        data[long] = 'open="%s"' % openingTimes[short]
                    else:
                        data[long] = 'closed="true"'
            for key in data:
                data[key] = xmlEscape(data[key])
            xml = template.format(**data)
            return xml

        return '<openmensa xmlns="http://openmensa.org/open-mensa-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="2.1" xsi:schemaLocation="http://openmensa.org/open-mensa-v2 http://openmensa.org/open-mensa-v2.xsd"/>'
	"""
	debug('processing %s ...' % articleId)
	conn = cur.connection
	try:
		cleartext = dechrome.parseCleartext(html)
		title = cleartext.split('\n')[0]
	except Exception, e:
		raise ProcessingError(e)

	# Debug: print output to a file for later inspection
	if 0:
		try:
			cur.execute("SELECT url FROM feed_article WHERE id=%s", (articleId,)); url=cur.fetchone()['url']
			debug('Article %s has url %s', articleId, url)
			if cleartext.startswith('\n'): cleartext = '???'+cleartext
			html_out = ('%s: <a href="%s">%s</a>' % (articleId, url, url)) + ('<pre><span style="font-weight:bold; font-size:20px">'+util.xmlEscape(cleartext)+'</pre>').replace('\n','</span>\n',1)
			with open('clean_output.html','a') as f: f.write(html_out.encode('utf8')); f.flush()
		except:
			exception("Error while producing debug output")

	if not cleartext:
		cur.execute("DELETE FROM processed_article WHERE feed_articleid=%s", (articleId,))
		cur.execute("UPDATE feed_article_meta SET is_cleartext='e' WHERE id=%s", (articleId,))
		raise NoTextFoundError
	else:
		try:
			cur.execute("UPDATE processed_article SET content=%s WHERE feed_articleid=%s AND mode='cleartext'", (cleartext, articleId,))
			if cur.rowcount == 0:
				cur.execute("INSERT INTO processed_article (content, feed_articleid, mode) VALUES (%s, %s, 'cleartext')", (cleartext, articleId,))
			cur.execute("UPDATE feed_article_meta SET is_cleartext='1', title=COALESCE(title,%s) WHERE id=%s", (title,articleId,))
		except psycopg2.DataError, e:
Beispiel #6
0
    def meta(self, refName):
        """Generate an openmensa XML meta feed from the static json file using an XML template"""
        with open(metaTemplateFile) as f:
            template = f.read()

        for ref, mensa in self.canteens.items():
            if refName != ref:
                continue

            data = {
                "name":
                mensa["name"],
                "address":
                mensa["address"],
                "city":
                mensa["city"],
                "latitude":
                mensa["latitude"],
                "longitude":
                mensa["longitude"],
                "feed":
                xmlEscape(
                    self.urlTemplate.format(
                        metaOrFeed='feed',
                        mensaReference=urllib.parse.quote(ref))),
                "source":
                xmlEscape(self.build_url(refName, baseUrlMeta)),
            }
            if "phone" in mensa:
                data["phone"] = f"<phone>{mensa['phone']}</phone>"
            else:
                data["phone"] = ""

            if "times" in mensa:
                openingTimes = {}
                pattern = re.compile(
                    r"([A-Z][a-z])(\s*-\s*([A-Z][a-z]))?\s*(\d{1,2}):(\d{2})\s*-\s*(\d{1,2}):(\d{2}) Uhr"
                )
                m = re.findall(pattern, mensa["times"])
                for result in m:
                    fromDay, _, toDay, fromTimeH, fromTimeM, toTimeH, toTimeM = result
                    openingTimes[fromDay] = "%02d:%02d-%02d:%02d" % (int(
                        fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM))
                    if toDay:
                        select = False
                        for short, long in weekdays_map:
                            if short == fromDay:
                                select = True
                            elif select:
                                openingTimes[short] = "%02d:%02d-%02d:%02d" % (
                                    int(fromTimeH), int(fromTimeM),
                                    int(toTimeH), int(toTimeM))
                            if short == toDay:
                                select = False

                    for short, long in weekdays_map:
                        if short in openingTimes:
                            data[long] = 'open="%s"' % openingTimes[short]
                        else:
                            data[long] = 'closed="true"'
                data['times'] = f"""
    <times type="opening">
      <monday {data['monday']} />
      <tuesday {data['tuesday']} />
      <wednesday {data['wednesday']} />
      <thursday {data['thursday']} />
      <friday {data['friday']} />
      <saturday {data['saturday']} />
      <sunday {data['sunday']} />
    </times>"""
            else:
                data['times'] = ''

            for key in data:
                data[key] = data[key]
            xml = template.format(**data)
            return xml

        return '<openmensa xmlns="http://openmensa.org/open-mensa-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="2.1" xsi:schemaLocation="http://openmensa.org/open-mensa-v2 http://openmensa.org/open-mensa-v2.xsd"/>'
Beispiel #7
0
def _interpolate(template, *args):
    return template % tuple(util.xmlEscape(a) for a in args)
Beispiel #8
0
			bottle.request.environ.get('HTTP_AUTHORIZATION'), username, password, acl_tag)
		traceback.print_exc()
		return bottle.HTTPResponse("You don't have the permission to access this stream", status=401)
	else:
		print 'GRANTED: user %r, acl_tag %r' % (username, acl_tag)
	
	after = bottle.request.GET.get('after','0000-00-00T00-00-00Z')
	fn_template = FILENAME_TEMPLATE.replace('%(acl_tag)s',acl_tag)  # hack: partial string interpolation
	if not fn_template:
		return bottle.HTTPResponse(
			"<h1>404</h1>Unknown stream: %s. Check http://newsfeed.ijs.si/ for possible URLs.",
			status=404)
	
	path = get_cached_file(fn_template=fn_template+'.gz', after=after, reverse_order=(after==None))
	if path is None:
		return bottle.HTTPResponse("<h1>404</h1>No gzips created after %s on stream %r yet." % (util.xmlEscape(after), acl_tag), status=404)
	else:
		dir, fn = os.path.split(path)
		return bottle.static_file(fn, root=dir, download=acl_tag+'-'+fn, mimetype='application/x-gzip')


if __name__=='__main__':
	zmqctx = zmq.Context()
	# (the socket is created in its own thread; context should be created in the main thread)

	# Debug only: uncomment either of the two below for a single-threaded run
	#zmq_to_files(); 1/0
	#bottle.debug(True); bottle.run(host='0.0.0.0', port=13380); 1/0
	
	# zmq subscriber
	threading.Thread(target=zmq_to_files).start()
Beispiel #9
0
        cleartext = dechrome.parseCleartext(html)
        title = cleartext.split('\n')[0]
    except Exception, e:
        raise ProcessingError(e)

    # Debug: print output to a file for later inspection
    if 0:
        try:
            cur.execute("SELECT url FROM feed_article WHERE id=%s",
                        (articleId, ))
            url = cur.fetchone()['url']
            debug('Article %s has url %s', articleId, url)
            if cleartext.startswith('\n'): cleartext = '???' + cleartext
            html_out = ('%s: <a href="%s">%s</a>' % (articleId, url, url)) + (
                '<pre><span style="font-weight:bold; font-size:20px">' +
                util.xmlEscape(cleartext) + '</pre>').replace(
                    '\n', '</span>\n', 1)
            with open('clean_output.html', 'a') as f:
                f.write(html_out.encode('utf8'))
                f.flush()
        except:
            exception("Error while producing debug output")

    if not cleartext:
        cur.execute("DELETE FROM processed_article WHERE feed_articleid=%s",
                    (articleId, ))
        cur.execute(
            "UPDATE feed_article_meta SET is_cleartext='e' WHERE id=%s",
            (articleId, ))
        raise NoTextFoundError
    else: