def open_directory_data_insert(): """ for inserting data after grabbing open directory dbase used => pdfs.pdf field: {added, keyword, title, url, snippet, thumb_updated, fpath} 1. downloading image 2. inserting data from fields """ if request.method == "POST": added = datetime.datetime.now() title = request.form["title"] snippet = request.form["snippet"] url = request.form["url"] keyword = title # insert database dengan 5 data di atas pdfdb = c["pdfs"] oid = pdfdb.pdf.insert({ "added": added, "title": title, "snippet": snippet, "url": url, "keyword": keyword, }) print "sukses" # buat folder path dulu # simpan di folder dgn format => thn/bln/tanggal # app folder app_folder = os.getcwd() # get year/month/date date_folder = datetime.datetime.now().strftime("%Y/%m/%d") # joined both full_folder_path = os.path.join(app_folder, "assets", date_folder) # ~/path_to_app/assets/2014/11/09/fname.jpg # create folder if not exists if not os.path.exists(full_folder_path): os.makedirs(full_folder_path) img_filename = "%s-%s.png" % (oid, slugify(title)) # full file path full_file_path = os.path.join(full_folder_path, img_filename) # download the thumbnail thumb_url = "http://docs.google.com/viewer?url=%s&a=bi&pagenumber=1&w=100" % url imgstr = urllib2.urlopen(thumb_url).read() with open(full_file_path, "w") as f: f.write(imgstr) # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update # nanti di halaman index, tampilkan data yang ada thumb_updated saja fpath = os.path.join(date_folder, img_filename) pdfdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"fpath": fpath, "thumb_updated": datetime.datetime.now()}}) return "sukses"
def admin_single_url_inject(): """ input: url path to pdf file process: download thumbnail, add title, snippet and url, also keyword (optional) post: input into database perlukah pdfbox disini???? """ if request.method == "POST": added = datetime.datetime.now() title = request.form["title"] snippet = request.form["snippet"] url = request.form["url"] keyword = title # insert database dengan 5 data di atas pdfdb = c["pdfs"] oid = pdfdb.pdf.insert({ "added": added, "title": title, "snippet": snippet, "url": url, "keyword": keyword, }) # print "sukses" # buat folder path dulu # simpan di folder dgn format => thn/bln/tanggal # app folder app_folder = os.getcwd() # get year/month/date date_folder = datetime.datetime.now().strftime("%Y/%m/%d") # joined both full_folder_path = os.path.join(app_folder, "assets", date_folder) # ~/path_to_app/assets/2014/11/09/fname.jpg # create folder if not exists if not os.path.exists(full_folder_path): os.makedirs(full_folder_path) img_filename = "%s-%s.png" % (oid, slugify(title)) # full file path full_file_path = os.path.join(full_folder_path, img_filename) # download the thumbnail thumb_url = "http://docs.google.com/viewer?url=%s&a=bi&pagenumber=1&w=100" % url imgstr = urllib2.urlopen(thumb_url).read() with open(full_file_path, "w") as f: f.write(imgstr) # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update # nanti di halaman index, tampilkan data yang ada thumb_updated saja fpath = os.path.join(date_folder, img_filename) pdfdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"fpath": fpath, "thumb_updated": datetime.datetime.now()}}) return "sukses %s %s %s" % (url, title, snippet) return render_template("admin/admin_inject_single_url.html")
def users_public_view(username): """ this is for public view contains: all the picture uploaded. """ dbuser = c["entities"] data = dbuser.users.find_one({"username": session.get("username")}) try: favorites = dbuser.users.find_one({"username": username})["favorited"] favorites = list(set(favorites)) # unique only except: favorites = [] # check if books have thumbnail, resouce? oid pdfdb = c["pdfs"] thumbs = [] images = [] titles = [] if favorites: for i in favorites: # get the data datafavorites = pdfdb.pdf.find_one({"_id": ObjectId(i)}) # jika ada fpath (thumbnail tersedia) if datafavorites.has_key("fpath"): image_path = datafavorites["fpath"] title = slugify(datafavorites["title"]) thumbs.append(True) images.append(image_path) titles.append(title) else: title = slugify(datafavorites["title"]) thumbs.append(False) images.append("") titles.append(title) # zip it favorites = zip(favorites, thumbs, images, titles) # print favorites return render_template("users/users_public_view.html", data=data, favorites=favorites, username=username)
def thumbnailer(term=None): # data docdb = c["pdfs"] # randomize the skip skip_number = random.randint(0, docdb.pdf.find().count() - 10) data = docdb.pdf.find({"thumb_updated": {"$exists": False}}).skip(skip_number).limit(10) # data = docdb.pdf.find().limit(10) if term is not None: # cari berdasarkan keyword $and berdasar thumb_updated == false # pakai regex aja, sebenere yang lebih pas pake ful text, biar bisa search di-url juga :( pattern = re.compile(".*" + term + ".*") # data = docdb.pdf.find({"$and": [{"thumb_updated": {"$exists": False}}, {"title": pattern}]}).limit(10) data = docdb.command("text", "pdf", search=term, limit=15) data = [d["obj"] for d in data["results"] if "thumb_updated" not in d["obj"].viewkeys()] # harusnya cari yang belum ada field thumb_updated if request.method == "POST": oid = request.form["oid"] url = request.form["url"] title = request.form["title"] # buat folder path dulu # simpan di folder dgn format => thn/bln/tanggal # app folder app_folder = os.getcwd() # get year/month/date date_folder = datetime.datetime.now().strftime("%Y/%m/%d") # joined both full_folder_path = os.path.join(app_folder, "assets", date_folder) # ~/path_to_app/assets/2014/11/09/fname.jpg # create folder if not exists if not os.path.exists(full_folder_path): os.makedirs(full_folder_path) img_filename = "%s-%s.png" % (oid, slugify(title)) # full file path full_file_path = os.path.join(full_folder_path, img_filename) # download the thumbnail imgstr = urllib2.urlopen(url).read() with open(full_file_path, "w") as f: f.write(imgstr) # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update # nanti di halaman index, tampilkan data yang ada thumb_updated saja fpath = os.path.join(date_folder, img_filename) docdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"added": datetime.datetime.now(), "fpath": fpath, "thumb_updated": datetime.datetime.now()}}) return "sukses %s %s" % (oid, url) return render_template("admin/admin_thumbnailer.html", data=data)
def recent_feed(): # http://werkzeug.pocoo.org/docs/contrib/atom/ # wajibun: id(link) dan updated feed = AtomFeed('Recent Articles', feed_url = request.url, url=request.url_root) data = [i[0] for i in col_term.get_range()][:100] for d in data: feed.add( d, content_type='text', url = 'http://www.hotoid.com/berita/' + slugify(d), updated = datetime.datetime.now(), ) return feed.get_response()
def recent_feed(): # http://werkzeug.pocoo.org/docs/contrib/atom/ feed = AtomFeed('Recent Files', feed_url = request.url, url=request.url_root) data = db.freewaredata.find().sort("_id", -1).limit(100) for d in data: try: feed.add( d['title'], d['description'], content_type='text', id=d['_id'], url='http://www.blankons.com/'+slugify(onlychars(d['title']))+'/'+unicode(d['_id']), updated=datetime.datetime.now(), ) except: feed.add( d['title'], content_type='text', id=d['_id'], url='http://www.blankons.com/'+slugify(onlychars(d['title']))+'/'+unicode(d['_id']), updated=datetime.datetime.now(), ) return feed.get_response()
def slug(s): """ transform words into slug usage: {{ string|slug }} """ return slugify(s)
def redirect_search(): q = request.args.get('q') return redirect("/" + slugify(q), 301)
def slug(s): return slugify(s)
def berita(topik): topik = slugify(topik) col_term.insert(unicode(topik.replace('-', ' ')), {'added': datetime.datetime.now(), 'hits': 1}) query = topik.replace('-', '+') url1 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:kompas.com' url2 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:detik.com' url3 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:viva.co.id' url4 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:kaskus.co.id' url5 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:merdeka.com' url6 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:republika.co.id' #url6 = 'https://gdata.youtube.com/feeds/api/videos?q='+query+'&v=2&alt=jsonc' try: col_topic.get(topik)['kompas'] data1 = json.loads(col_topic.get(topik)['kompas']) except: response1 = urllib2.urlopen(url1).read() col_topic.insert(topik, {'kompas': response1}, ttl=86400 * 7) data1 = json.loads(col_topic.get(topik)['kompas']) try: col_topic.get(topik)['detik'] data2 = json.loads(col_topic.get(topik)['detik']) except: response2 = urllib2.urlopen(url2).read() col_topic.insert(topik, {'detik': response2}, ttl=86400 * 7) data2 = json.loads(col_topic.get(topik)['detik']) try: col_topic.get(topik)['vivanews'] data3 = json.loads(col_topic.get(topik)['vivanews']) except: response3 = urllib2.urlopen(url3).read() col_topic.insert(topik, {'vivanews': response3}, ttl=86400 * 7) data3 = json.loads(col_topic.get(topik)['vivanews']) try: col_topic.get(topik)['kaskus'] data4 = json.loads(col_topic.get(topik)['kaskus']) except: response4 = urllib2.urlopen(url4).read() col_topic.insert(topik, {'kaskus': response4}, ttl=86400) data4 = json.loads(col_topic.get(topik)['kaskus']) try: col_topic.get(topik)['merdeka'] data5 = json.loads(col_topic.get(topik)['merdeka']) except: response5 = urllib2.urlopen(url5).read() col_topic.insert(topik, {'merdeka': response5}, ttl=86400 * 7) data5 = json.loads(col_topic.get(topik)['merdeka']) try: col_topic.get(topik)['republika'] data6 = json.loads(col_topic.get(topik)['republika']) except: response6 = urllib2.urlopen(url6).read() col_topic.insert(topik, {'republika': response6}, ttl=86400 * 7) data6 = json.loads(col_topic.get(topik)['republika']) return render_template("berita2.html", data1=data1, data2=data2, data3=data3, data4=data4, data5=data5, data6=data6, topik=topik)
def cari(): q = request.args.get('q') return redirect("/berita/" + slugify(q), 301)
def music(): q = request.args.get('q') return redirect("/"+slugify(q)+"/", 301)
def suggested_topics(tag): """ buat halaman tags yang isinya search term dari onkeywords.com, adwords, ubersuggests dan sejenisnya """ # return redirect("/tag/" + tag) pdfdb = c["pdfs"] tagsdb = c["pdfterms"] terms = c["terms"] # prevent keyword injection kwrd = tagsdb.term.find_one({"term": tag.replace("-", " ")}) if not kwrd: return redirect("/", 302) tag = tag.replace("-", " ") data = pdfdb.command("text", "pdf", search=tag, limit=10) results_count = data["stats"]["nscanned"] # ini harusnya prioritas nyari yang ada thumbnailnya dulu # piye carane yo???? data = [d["obj"] for d in data["results"]] # building related data related_data = terms.command("text", "term", search=tag, limit=10) related_data = [d["obj"] for d in related_data["results"]] ## on-page seo # 1. related tags for meta desc meta_desc = ", ".join([d["term"] for d in related_data][:5]) # 2. meta keywords, ambil dari tag, split, join meta_key = ", ".join([t for t in tag.split(" ") if len(t) > 3]) # 3. meta key tags with get rid off short word meta_key_tags = [t for t in tag.split(" ")[:5] if len(t) > 3] # 4. fake category? meta_key_cat = tag.split(" ")[0] # get tags suggestion to enrich index and strengthen onpage seo tags = tagsdb.command("text", "term", search=tag) tags = [d["obj"] for d in tags["results"]] random.shuffle(tags) tags = tags[:5] # show h1 tags only for googlebot # googlebot detection refresher = False print print print "referrer: %s" % request.headers print "====" * 10 print request.headers.get("Referer") # print request.headers.get("referrer") print print # if 'Googlebot' in request.headers['User-Agent']: try: if 'google.com' in request.referrer: refresher = True # save setem, on file for now => setem.log from urllib import unquote_plus referer = request.headers.get("Referer") pattern = re.compile(r"&q=(.*?)&") setem = re.search(pattern, referer) # print referer if setem: # ada setem setem = re.search(pattern, referer) print setem.group(1) # write to file with open(os.path.join(os.getcwd(), "setem.log"), "a") as f: if len(setem.group(1)) > 5: # remove nil setem f.write(unquote_plus(setem.group(1)) + "\n") #### # disini ini sekalian membuat halaman tag untuk kemudian return redirect("/topic/" + slugify(tag)) # redirect ke tags yang asli, halman tag di kasih javascript code buat # prevent back button #### except: pass # users count users_count = dbentity.users.find().count() last_user = [i for i in dbentity.users.find()][-1] return render_template("topics.html", data=data, tag=tag, results_count=results_count, related_data=related_data, tags=tags, meta_desc=meta_desc, meta_key=meta_key, meta_key_tags=meta_key_tags, meta_key_cat=meta_key_cat, refresher=refresher, users_count=users_count, last_user=last_user)
def music(): q = request.args.get('q') db.terms2.insert({'query': q.replace('-', ' ')}) return redirect("/"+slugify(q)+"/", 301)