def feed(): ''' add or edit items in the newsfeed on the home page ''' cur = g.db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) # edit an item in the feed if request.form.get('edit') == 'edit': item_id = request.form.get('item') title = request.form.get('title') link = request.form.get('link') text = request.form.get('text') if title and link and text: cur.execute( 'UPDATE feed_items SET title=%s, link=%s, text=%s WHERE id=%s', (title, urlnorm(link), text, item_id)) g.db.commit() flash('Item updated.') else: flash('Invalid request.') g.db.commit() # delete an item from the feed elif request.form.get('delete') == 'delete': item_id = request.form.get('item') cur.execute('DELETE FROM feed_items WHERE id=%s', (item_id,)) g.db.commit() flash('Item deleted.') # add an item to the feed else: title = request.form.get('title') link = request.form.get('link') text = request.form.get('text') created = datetime.datetime.now() published = datetime.datetime.now() if title and link and text: cur.execute( '''INSERT INTO feed_items (title, link, text, created, published) VALUES (%s, %s, %s, %s, %s)''', (title, urlnorm(link), text, created, published)) g.db.commit() flash('Item added.') else: flash('Invalid request.') return redirect(url_for('home'))
def find_renarrartion(): """Experimental API to query for re-narrations. The API accepts following params: url - url of the page for which re-narrations are needed language - language of re-narration author (optional) - author(s) of the re-narration""" lang = request.args['lang'] url = urlnorm(request.args['url']) collection = g.db['post'] if 'author' in request.args: authors = request.args.getlist('author') query = collection.find({'about': url, 'lang': lang, 'author': {'$in': authors}}) else: query = collection.find({'about': url, 'lang': lang}) d = {} cntr = 0 for i in query: i['_id'] = str(i['_id']) d[cntr] = i cntr += 1 response = jsonify(d) return response
def serve_domain_info(): collection = g.db['post'] url = urlnorm(request.args.get('url')) #all re-narrations of the same xpath are grouped query = collection.group( key=None, condition={"about": { '$regex': url + '*' }}, initial={'narration': []}, reduce=Code('function(doc,out){out.narration.push(doc["about"]);}')) string = '' if len(query) == 0: return jsonify({'0': 'empty'}) else: otherlist = {} cntr = -1 mylist = query[0]['narration'] for i in mylist: if i in otherlist: pass else: cntr += 1 otherlist[cntr] = str(i) return jsonify(otherlist)
def serve_domain_info(): collection = g.db['post'] url = urlnorm(request.args.get('url')) #all re-narrations of the same xpath are grouped query = collection.group( key=None, condition={"about": {'$regex': url+'*'}}, initial={'narration': []}, reduce=Code('function(doc,out){out.narration.push(doc["about"]);}') ) string = '' if len(query) == 0: return jsonify({'0': 'empty'}) else: otherlist = {} cntr = -1 mylist = query[0]['narration'] for i in mylist: if i in otherlist: pass else: cntr += 1 otherlist[cntr] = str(i) return jsonify(otherlist)
def find_renarrartion(): """Experimental API to query for re-narrations. The API accepts following params: url - url of the page for which re-narrations are needed language - language of re-narration author (optional) - author(s) of the re-narration""" lang = request.args['lang'] url = urlnorm(request.args['url']) collection = g.db['post'] if 'author' in request.args: authors = request.args.getlist('author') query = collection.find({ 'about': url, 'lang': lang, 'author': { '$in': authors } }) else: query = collection.find({'about': url, 'lang': lang}) d = {} cntr = 0 for i in query: i['_id'] = str(i['_id']) d[cntr] = i cntr += 1 response = jsonify(d) return response
def start_worker(): while True: try: url = urlnorm(q.get()) if exploit(url): sys.stdout.write(url + ' ===> Uploaded \n') else: sys.stdout.write(url + '\n') except: pass q.task_done()
def replace(): lang = request.args['lang'] url = urlnorm(request.args['url']) if 'author' in request.args: query = query_by_params(url, lang, request.args.get('author')) else: query = query_by_params(url, lang) for i in query: for y in i['narration']: del(y['_id']) d = {} d['r'] = query response = jsonify(d) return response
def replace(): lang = request.args['lang'] url = urlnorm(request.args['url']) if 'author' in request.args: query = query_by_params(url, lang, request.args.get('author')) else: query = query_by_params(url, lang) for i in query: for y in i['narration']: del (y['_id']) d = {} d['r'] = query response = jsonify(d) return response
def import_url(): station_name = request.form.get('station_name') print repr("station name:"+station_name) if station_name in mongo.db.collection_names(): sname_obj=mongo.db[station_name] else: sname_obj=mongo.db.create_collection(station_name) content = parse(urlnorm(request.form.get('url'))).getroot() content.make_links_absolute(urlnorm(request.form.get('url')), True) count = 0 for link in content.iterlinks(): if link[0].tag == 'a' and link[0].getparent().tag == 'td' and link[0].text != 'Parent Directory': sname_obj.save({'url': urlnorm(link[2]), 'uploadDate': link[0].getparent() .getnext().text.strip(), 'stationName': station_name}) count += 1 if count > 0: return jsonify({'count': count, 'status': 'success'}) else: return jsonify({'count': count, 'status': 'error'})
def menuForDialog(): if 'option' not in request.args: collection = g.db['post'] c = {} cntr = 0 url = urlnorm(request.args.get('url')) for i in collection.find({"about": url}).distinct('lang'): for j in collection.find({ "about": url, 'lang': i }).distinct('type'): d = {} d['lang'] = i d['type'] = j c[cntr] = d cntr += 1 print c return jsonify(c) else: collection = g.db['post'] #get the ren languages for the received url langForUrl = collection.group( key=Code('function(doc){return {"about" : doc.about}}'), condition={ "about": d['url'], "blog": { '$regex': '/' + d['option'] + '.*/' } }, initial={'lang': []}, reduce=Code('function(doc, out){' + 'if (out.lang.indexOf(doc.lang) == -1)' + 'out.lang.push(doc.lang)}') # here xpath for test ) #send the response if (langForUrl): response = make_response() response.headers['Access-Control-Allow-Origin'] = '*' response.data = json.dumps(langForUrl[0]['lang']) return response else: return "empty"
def publish(): data = json.loads(request.form['data']) collection = g.db['post'] page = {} if type(data) is unicode: # A hack to fix malformed data. FIXME. data = json.loads(data) content = [] for i in data: print i # Create content objects here for posting to blog. DELETEME. if 'comments' in i: page['comments'] = i['comments'] else: # normalize URLs before inserting to DB i['about'] = urlnorm(i['about']) contentobj = {} contentobj['type'] = i['elementtype'] contentobj['attr'] = { "language": i['lang'], "location": i['location'], "about": i['about'], "xpath": i['xpath'] } contentobj['data'] = i['data'] content.append(contentobj) i['bxpath'] = '' collection.insert(i) page['title'] = "Re-narration of " + content[0]['attr']['about'] page['name'] = "About " + content[0]['attr']['about'] page['content'] = content g.response_from_blogger = requests.api.post( conf.CUSTOM_BLOG_POST_URL[0], json.dumps(page), headers={"content-type": "application/json"}) print "response from blogger " + repr(g.response_from_blogger) sweet(data) reply = make_response() return reply
def publish(): data = json.loads(request.form['data']) collection = g.db['post'] page = {} if type(data) is unicode: # A hack to fix malformed data. FIXME. data = json.loads(data) content = [] for i in data: print i # Create content objects here for posting to blog. DELETEME. if 'comments' in i: page['comments'] = i['comments'] else: # normalize URLs before inserting to DB i['about'] = urlnorm(i['about']) contentobj = {} contentobj['type'] = i['elementtype'] contentobj['attr'] = {"language": i['lang'], "location": i['location'], "about": i['about'], "xpath": i['xpath']} contentobj['data'] = i['data'] content.append(contentobj) i['bxpath'] = '' collection.insert(i) page['title'] = "Re-narration of " + content[0]['attr']['about'] page['name'] = "About " + content[0]['attr']['about'] page['content'] = content g.response_from_blogger = requests.api.post(conf.CUSTOM_BLOG_POST_URL[0], json.dumps(page), headers={"content-type": "application/json"}) print "response from blogger " + repr(g.response_from_blogger) sweet(data) reply = make_response() return reply
def menuForDialog(): if 'option' not in request.args: collection = g.db['post'] c = {} cntr = 0 url = urlnorm(request.args.get('url')) for i in collection.find({"about": url}).distinct('lang'): for j in collection.find({"about": url, 'lang': i}).distinct('type'): d = {} d['lang'] = i d['type'] = j c[cntr] = d cntr += 1 print c return jsonify(c) else: collection = g.db['post'] #get the ren languages for the received url langForUrl = collection.group( key=Code('function(doc){return {"about" : doc.about}}'), condition={"about": d['url'], "blog": {'$regex': '/'+d['option']+'.*/'}}, initial={'lang': []}, reduce=Code('function(doc, out){' + 'if (out.lang.indexOf(doc.lang) == -1)' + 'out.lang.push(doc.lang)}') # here xpath for test ) #send the response if (langForUrl): response = make_response() response.headers['Access-Control-Allow-Origin'] = '*' response.data = json.dumps(langForUrl[0]['lang']) return response else: return "empty"
def worker(self, args, expected): self.assertEqual(urlnorm.urlnorm(*args), expected)
def page(category_slug, section_slug): ''' display all content in a given section ''' cur = g.db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) if request.method == 'POST': # edit an entry in the section if request.form.get('item') and request.form.get('edit'): item_id = request.form.get('item') title = request.form.get('title') link = request.form.get('link') link = urlnorm(link) if link else None text = request.form.get('text') if title and text and current_user.is_active(): cur.execute( '''UPDATE data_items SET title=%s, link=%s, text=%s WHERE id=%s''', (title, link, text, item_id)) g.db.commit() flash('Item updated.') else: flash('Invalid request.') # delete an entry in the section elif request.form.get('item') and request.form.get('delete'): if current_user.is_active(): item_id = request.form.get('item') delete_from_linkedlist('data_items', item_id) g.db.commit() flash('Item deleted.') else: flash('Invalid request.') # edit the section elif request.form.get('edit') == 'edit': sec_id = request.form.get('id') name = request.form.get('name') desc = request.form.get('description') section_slug = slugify_section(name, sec_id) if sec_id and name and current_user.is_active(): cur.execute( '''UPDATE sections SET name=%s, description=%s, slug=%s WHERE id=%s''', (name, desc, section_slug, sec_id)) g.db.commit() flash('Section updated.') else: flash('Invalid request.') # delete the section elif request.form.get('delete') == 'delete' and request.form.get('id'): sec_id = request.form.get('id') if current_user.is_active(): delete_from_linkedlist('sections', sec_id) cur.execute('DELETE FROM data_items WHERE section=%s', (sec_id,)) g.db.commit() flash('Section deleted.') return redirect(url_for('home')) else: flash('Invalid request.') # add an entry to the section else: title = request.form.get('title') link = request.form.get('link') link = urlnorm(link) if link else None text = request.form.get('text') if title and text and current_user.is_active(): cur.execute( '''INSERT INTO data_items (title, link, text, section) VALUES (%s, %s, %s, (SELECT id FROM sections WHERE slug=%s))''', (title, link, text, section_slug)) g.db.commit() flash('Item added.') else: flash('Invalid request.') # retrieve the section metadata cur.execute( '''SELECT sections.id, sections.name, sections.description, categories.id AS cat_id, categories.name AS cat_name, (SELECT COUNT(data_items) FROM data_items WHERE data_items.section=sections.id) as items_count FROM sections JOIN categories ON sections.category=categories.id WHERE categories.slug=%s AND sections.slug=%s''', (category_slug, section_slug)) section = cur.fetchone() if not section: abort(404) # retrieve the individual page of content cur.execute( '''SELECT data_items.id, data_items.prev_id, data_items.next_id, title, link, text, meta FROM data_items JOIN sections ON section=sections.id JOIN categories ON sections.category=categories.id WHERE categories.slug=%s AND sections.slug=%s ORDER BY data_items.id''', (category_slug, section_slug)) content = ordered(cur.fetchall()) # render the page return render_template( 'content.html', section=section, content=content, category_slug=category_slug, section_slug=section_slug)
def testPostPluginHook(self): self.assertEqual(urlnorm.urlnorm('http://d/p'), 'http://d/path')
import pymongo from urlnorm import urlnorm db_client = pymongo.MongoClient() db = db_client['alipi'] posts = db['post'] print "Total posts: %s" % str(posts.count()) for i in posts.find(): print 'Post:: %s' % str(i['_id']) if not i['about']: print "No URL. Skipping.." continue print "Not normalized URL: %s" % i['about'] print "Normalized URL: %s" % urlnorm(i['about']) #print "..Updating about field.." #posts.update({'_id': i['_id']}, {'$set': {'about': urlnorm(i['about'])}})
def u_norm(doc_url, url): try: return urlnorm(urljoin(doc_url,url)) except: return "invalid:" + url
def u_norm(doc_url, url): try: return urlnorm(urljoin(doc_url, url)) except: return "invalid:" + url