def set_file_vote(self, file_id, user, lang, vote): ''' Guarda el voto en la colección y actualiza el archivo correspondiente con los nuevos datos ''' data = { "u": user.id, "k": 1 if vote == 1 else -1, "d": datetime.utcnow(), "l": lang, } # TODO(felipe): borrar con error solucionado if user.id < 0 and user.is_authenticated(): logging.error("Inconsistencia de usuario votando logeado id negativo.", extra=locals()) else: if user.is_authenticated(): data["_id"] = "%s_%s" % (mid2hex(file_id), user.id) self.user_conn.users.vote.update( {"_id": data["_id"]}, data, upsert=True) else: data["_id"] = "%s:%s" % (mid2hex(file_id), user.session_ip) self.user_conn.users.vote.update( {"_id": data["_id"], "u": data["u"]}, data, upsert=True) # Para cada idioma guarda el karma, la cuenta total y la suma map_function = Code(''' function() { emit(this.l,{ k:this.k, c:new Array((this.k>0)?1:0,(this.k<0)?1:0), s:new Array((this.k>0)?this.k:0,(this.k<0)?this.k:0) }) }''') # Suma todo y aplica la funcion 1/1+E^(-X) para que el valor este entre 0 y 1 reduce_function = Code(''' function(lang, vals) { c=new Array(0,0); s=new Array(0,0); for (var i in vals) { c[0]+=vals[i].c[0]; c[1]+=vals[i].c[1]; s[0]+=vals[i].s[0]; s[1]+=vals[i].s[1]; } return {t:1/(1+Math.exp(-((s[0]*c[0]+s[1]*c[1])/(c[0]+c[1])))), c:c, s:s}; }''') # Tercer parametro para devolverlo en vez de generar una coleccion nueva votes = self.user_conn.users.vote.map_reduce( map_function, reduce_function, {"inline": 1}, query = {"_id": {"$regex": "^%s" % mid2hex(file_id)}} ) # Devolver un diccionario de la forma idioma:valores data = {values["_id"]: values["value"] for values in votes["results"]} self.user_conn.end_request() return data
def users(): ''' Administración de usuarios ''' searchform = SearchUserForm(request.form, prefix="searchform_") if request.method == "POST": user_data = None mode = searchform.mode.data identifier = searchform.identifier.data if mode == "username": user_data = usersdb.find_username(identifier) elif mode == "email": user_data = usersdb.find_email(identifier) elif mode == "hexid": user_data = usersdb.find_userid(identifier) elif mode == "oauth": user_data = usersdb.find_oauthid(identifier) if user_data: return redirect(url_for("admin.db_edit", collection="user", document_id=mid2hex(user_data["_id"]))) else: flash("admin_users_not_found","error") return render_template('admin/users.html', page_title=_('admin_users'), user_count=usersdb.count_users(), blocked=False, search_form=searchform)
def get_files(ids): ''' Recibe lista de tuplas de tamaño 3 (como devueltas por get_ids de search) y devuelve los ficheros del mongo correspondiente que no estén bloqueados. Si se recibe un fichero bloqueado, lo omite y bloquea en el sphinx. @type ids: iterable de tuplas de tamaño 3 o mayor @param ids: lista de tuplas (mongoid, id servidor, id sphinx) @yield: cada uno de los resultados de los ids en los mongos ''' toblock = [] already = False for f in filesdb.get_files(ids, servers_known = True, bl = None): if f["bl"] == 0 or f["bl"] is None: yield f else: toblock.append(mid2hex(f["_id"])) # bloquea en sphinx los ficheros bloqueados if toblock: cache.cacheme = False id_list = {i[0]:i[2] for i in ids} block_files( sphinx_ids=[id_list[i] for i in toblock] )
def search(): query = request.args.get("q", None) if not query: flash("write_something") return redirect(url_for("index.home")) results = {"total_found":0,"total":0,"time":0} g.title = "%s - %s" % (query, g.title) from time import time a=time() # obtener los resultados y sacar la paginación s = searchd.search(query, None, all_langs.get(g.lang)) print "a", time()-a; a=time() stats = s.get_stats() print "b", time()-a; a=time() results["time"] = 1 results["total_found"] = int(stats["cs"]) ids = list(itertools.islice(s.get_results(), 0, 20)) print "c", time()-a; a=time() files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)} print "d", time()-a; a=time() files=list(files_dict[file_id[0]] for file_id in ids if file_id[0] in files_dict) print "e", time()-a; a=time() return render_template('files/search.html', results=results, search=request.args["q"].split(" "), files=files, pagination=Pagination(1, 10, min(results["total_found"], 1000)), didyoumean=None, tags=None)
def users(): """ Administración de usuarios """ # Esta plantilla tiene 2 formularios, filtramos según cuál haya sido enviado searchform = SearchUserForm(request.form, prefix="searchform_") if request.method == "POST": user_data = None mode = searchform.mode.data identifier = searchform.identifier.data if mode == "username": user_data = usersdb.find_username(identifier) elif mode == "email": user_data = usersdb.find_email(identifier) elif mode == "hexid": user_data = usersdb.find_userid(identifier) elif mode == "oauth": user_data = usersdb.find_oauthid(identifier) if user_data: return redirect(url_for("admin.db_edit", collection="user", document_id=mid2hex(user_data["_id"]))) else: flash("admin_users_not_found", "error") return render_template("admin/users.html", page_title=_("admin_users"), blocked=False, search_form=searchform)
def init_data(file_data): ''' Inicializa el diccionario de datos del archivo ''' file_id=mid2hex(file_data["_id"]) file_data["id"]=mid2url(file_data['_id']) file_data['name']=file_data['src'][file_id]['url'] return {"file":file_data,"view":{}}
def get_file_vote(self, file_id, user, lang): ''' Recupera el voto de un usuario para un archivo ''' if user.is_authenticated(): data = self.user_conn.users.vote.find_one({ "_id": "%s_%s" % (mid2hex(file_id), user.id), "l": lang }) else: data = self.user_conn.users.vote.find_one({ "_id": "%s:%s" % (mid2hex(file_id), user.session_ip), "l": lang, "u": user.id }) self.user_conn.end_request() return data
def search(): ''' Realiza una búsqueda de archivo ''' # TODO: seguridad en param #si no se ha buscado nada se manda al inicio query = request.args.get("q", None) if not query: flash("write_something") return redirect(url_for("index.home")) #para evitar errores cuando en page no viene un número page = request.args.get("page", "1") if page.isdigit(): page = int(page) else: abort(404) g.title = "%s - %s" % (query, g.title) results = {"total_found":0,"total":0,"time":0} didyoumean = None tags = None if 0 < page < 101: #obtener los tags y el quiso decir tags, dym = taming_search(query, request.args.get("type", None)) #obtener los resultados y sacar la paginación profiler.checkpoint(opening=["sphinx"]) results = search_files(query,request.args,page) or results ids = get_ids(results) profiler.checkpoint(opening=["mongo"], closing=["sphinx"]) files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)} profiler.checkpoint(opening=["visited"], closing=["mongo"]) save_visited(files_dict.values()) profiler.checkpoint(closing=["visited"]) files=({"file":files_dict[bin2hex(file_id[0])], "search":file_id} for file_id in ids if bin2hex(file_id[0]) in files_dict) # recupera los resultados del taming try: tags = tags.next() didyoumean = dym.next() except: pass else: files = () return render_template('files/search.html', results=results, search=request.args["q"].split(" "), files=files, pagination=Pagination(page, 10, min(results["total_found"], 1000)), didyoumean=didyoumean, tags=tags)
def search(): ''' Realiza una búsqueda de archivo ''' # TODO: seguridad en param #si no se ha buscado nada se manda al inicio query = request.args.get("q", None) if not query: flash("write_something") return redirect(url_for("index.home")) page = int(request.args.get("page", 1)) g.title = query+" - "+g.title results = {"total_found":0,"total":0,"time":0} didyoumean = None tags = None if 0 < page < 101: #obtener los tags y el quiso decir taming = taming_search(current_app.config, query, request.args.get("type", None), contextg=g._get_current_object()) #obtener los resultados y sacar la paginación profiler.checkpoint(opening=["sphinx"]) results = search_files(query,request.args,page) or results ids = get_ids(results) profiler.checkpoint(opening=["mongo"], closing=["sphinx"]) files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)} profiler.checkpoint(opening=["visited"], closing=["mongo"]) save_visited(files_dict.values()) profiler.checkpoint(closing=["visited"]) files=(files_dict[bin2hex(file_id[0])] for file_id in ids if bin2hex(file_id[0]) in files_dict) # recupera los resultados del taming try: tags = taming.next() didyoumean = taming.next() except: pass else: files = () return render_template('files/search.html', results=results, search=request.args["q"].split(" "), files=files, pagination=Pagination(page, 10, min(results["total_found"], 1000)), didyoumean=didyoumean, tags=tags)
def block_files(sphinx_ids=(), mongo_ids=(), block=True): """ Recibe ids de sphinx u ObjectIDs de mongodb de ficheros y los bloquea en el sphinx (atributo bl a 1). """ sph = sphinxapi2.SphinxClient() sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"]) sph.SetConnectTimeout(current_app.config["SERVICE_SPHINX_CONNECT_TIMEOUT"]) sph.SetMatchMode(sphinxapi2.SPH_MATCH_FULLSCAN) sph.SetLimits(0, 1, 1, 1) sphinx_ids = list(sphinx_ids) if mongo_ids: # Si recibo ids de mongo, ejecuto una petición múltiple para encontrar # los ids de sphinx for i in xrange(0, len(mongo_ids), 32): # Proceso los ids de mongo en grupos de 32, que es el límite que # me permite sphinx for mongoid in mongo_ids[i : i + 32]: uri1, uri2, uri3 = struct.unpack("III", mid2bin(mongoid)) sph.ResetFilters() sph.SetFilter("uri1", [uri1]) sph.SetFilter("uri2", [uri2]) sph.SetFilter("uri3", [uri3]) sph.AddQuery("", "idx_files", "Searching fileid %s" % mid2hex(mongoid)) results = sph.RunQueries() if results: for result in results: if "matches" in result and result["matches"]: sphinx_ids.append(result["matches"][0]["id"]) if "warning" in result and result["warning"]: logging.warning(result["warning"]) else: logging.error(sph.GetLastError()) sph.ResetFilters() tr = sph.UpdateAttributes("idx_files", ["bl"], {i: [1 if block else 0] for i in sphinx_ids}) sph.Close() return tr == len(sphinx_ids) and tr == len(mongo_ids)
def get_files(ids, sphinx_search=None): ''' Recibe lista de tuplas de tamaño 3 o mayor (como las devueltas por search) y devuelve los ficheros del mongo correspondiente que no estén bloqueados. Si se recibe un fichero bloqueado, lo omite y bloquea en el sphinx. @type ids: iterable de tuplas de tamaño 3 o mayor @param ids: lista de tuplas (mongoid, id servidor, id sphinx) @yield: cada uno de los resultados de los ids en los mongos ''' toblock = [] for f in filesdb.get_files(ids, servers_known = True, bl = None): if f["bl"] == 0 or f["bl"] is None: yield f else: toblock.append((mid2hex(f["_id"]), str(f["s"]))) # bloquea en sphinx los ficheros bloqueados if toblock and sphinx_search: cache.cacheme = False id_list = {i[0]:i for i in ids} sphinx_search.block_files([(id_list[mid][2],server, None, mid, id_list[mid][4]) for mid, server in toblock])
def download(file_id, file_name=""): g.page_type = FILE_PAGE_TYPE if request.referrer: try: posibles_queries = referrer_parser.match(request.referrer) if posibles_queries: query = posibles_queries.group(1) or posibles_queries.group(2) or "" if query: get_query_info(u(urllib2.unquote_plus(query).decode("utf-8"))) except: pass error = None file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except TypeError as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=404 else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301} except BaseException as e: logging.exception(e) error=503 file_id=None if file_id: try: file_data=get_file_metadata(file_id, file_name.replace("-"," ")) except DatabaseError: error=503 except FileNotExist: error=404 except (FileRemoved, FileFoofindRemoved, FileNoSources): error=410 except FileUnknownBlock: error=404 if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=503 if error: abort(error) # completa datos de torrent file_data = torrents_data(file_data, True, g.category) if not file_data: abort(404) if file_data["view"]["category"]: g.category = file_data["view"]["category"] if file_data["view"]["category"].tag=="p**n": g.is_adult_content = True else: g.category = file_data["view"]["category_type"] # no permite acceder ficheros que deberian ser bloqueados prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn']) if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]): g.blacklisted_content = "File" if not g.show_blacklisted_content: abort(404) query = download_search(file_data, file_name, "torrent").replace("-"," ") related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None) # elige el titulo de la página title = file_data['view']['fn'] # recorta el titulo hasta el proximo separador if len(title)>101: for pos in xrange(101, 30, -1): if title[pos] in SEPPER: title = title[:pos].strip() break else: title = title[:101] g.title = [title] page_description = "" if "description" in file_data["view"]["md"]: page_description = file_data["view"]["md"]["description"].replace("\n", " ") if not page_description: if g.category: page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize() else: page_description = _("download_desc") if len(page_description)<50: if page_description: page_description += ". " page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"]) if len(page_description)>180: last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0 if last_stop<100: last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0 if last_stop<100: last_stop = 180 page_description = page_description[:last_stop]+"." g.page_description = page_description is_canonical_filename = file_data["view"]["seo-fn"]==file_name # registra visita al fichero if g.search_bot: searchd.log_bot_event(g.search_bot, True) else: save_visited([file_data]) if related[0]: g.must_cache = 3600 # last-modified g.last_modified = file_data["file"]["ls"] return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))
def lock_file(complaint_id=None): ''' Información y bloqueo de ficheros, puede recibir un id de queja, o una lista de ids (en hex) de ficheros separados por la letra "g" ''' page = request.args.get("page", 0, int) mode = request.args.get("show", "old", str) size = request.args.get("size", 15, int) filenames = {} bugged = [] fileids = () if request.method == 'POST': if not "fileids" in request.form: searchform = BlockFileSearchForm(request.form) identifiers = searchform.identifier.data.split() if searchform.mode.data == "hexid": fileids = [ mid2hex(hex2mid(i)) for i in identifiers if all(x in "0123456789abcdef" for x in i) ] elif searchform.mode.data == "b64id": fileids = [ mid2hex(url2mid(i)) for i in identifiers if all(x in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!-" for x in i) and (len(i)*8)%6 == 0 ] elif searchform.mode.data == "url": filenames.update( ( mid2hex(fileurl2mid(i)), u".".join(urllib2.unquote(i.split("/")[-1]).split(".")[:-1]) ) for i in identifiers if i.startswith("http") and len(i.split("//")[1].split("/")) > 3 ) fileids = filenames.keys() if not fileids: return redirect(url_for('admin.locks', page=page, mode=mode, size=size)) else: block = request.form.get("block", False, bool) unblock = request.form.get("unblock", False, bool) if block or unblock: # submit confirmar if complaint_id: pagesdb.update_complaint({"_id":complaint_id,"processed":True}) fileids = dict(i.split(":") for i in request.form["fileids"].split(",")) sphinx_block = [] sphinx_unblock = [] for fileid, server in fileids.iteritems(): (sphinx_block if block and not unblock else sphinx_unblock).append(fileid) req = {"_id":fileid, "bl": int(block and not unblock)} if server: req["s"] = int(server) # si recibo el servidor, lo uso por eficiencia try: # TODO(felipe): comprobar en qué casos se puede llegar aquí sin "s" filesdb.update_file(req, direct_connection=True, update_sphinx=False) except: flash("No se ha podido actualizar el fichero con id %s" % fileid, "error") if sphinx_block: block_files_in_sphinx(mongo_ids=sphinx_block, block=True) if sphinx_unblock: block_files_in_sphinx(mongo_ids=sphinx_unblock, block=False) flash("admin_locks_locked" if block else "admin_locks_unlocked", "success") elif request.form.get("cancel", False, bool): # submit cancelar if complaint_id: pagesdb.update_complaint({"_id":complaint_id,"processed":True}) flash("admin_locks_not_locked", "success") return redirect(url_for('admin.locks', page=page, mode=mode, size=size)) complaint_data = None # Hay un único o ningún registro de queja por formulario files_data = OrderedDict() # Pueden haber varios ficheros por formulario if complaint_id: # Si hay queja, sólo hay una url, la de la queja complaint_data = pagesdb.get_complaint(complaint_id) if complaint_data and "urlreported" in complaint_data: # extracción el id de complaint["urlreported"] de base64 a hexadecimal files_data[mid2hex(fileurl2mid(complaint_data["urlreported"]))] = None elif fileids: # Si no hay queja, los ficheros se sacan de la url files_data.update((i,None) for i in fileids) # Suponemos si queremos bloquear, desbloquear, o mostrar las dos opciones # dependiendo de sí de los ficheros están bloqueados o no # además rellenamos la información de los ficheros blocked = 0 unblocked = 0 for fileid in files_data.iterkeys(): data = filesdb.get_file(fileid, bl=None) if data is None and fileid in filenames: bugged.append(fileid) sid = get_id_server_from_search(fileid, filenames[fileid]) if sid: data = filesdb.get_file(fileid, sid = sid, bl = None) files_data[fileid] = data or {} if not "bl" in files_data[fileid] or files_data[fileid]["bl"] == 0: unblocked += 1 else: blocked += 1 return render_template('admin/lock_file.html', page_title=_('admin_locks_fileinfo'), complaint_data=complaint_data, files_data=files_data, filenames = filenames, bugged = bugged, fileids=",".join( "%s:%s" % (fileid, prop["s"] if "s" in prop else "") for fileid, prop in files_data.iteritems()), blocked=None if blocked and unblocked else blocked > 0, list_mode=mode, page=page, title=admin_title('admin_losfdcks_fileinfo'))
def save_visited(self, files): try: self.searchd.get_redis_connection().publish(VISITED_LINKS_CHANNEL, msgpack.packb([mid2hex(f["file"]["_id"]) for f in files if f])) except BaseException as e: logging.exception("Can't log visited files.")
profiler.checkpoint(profiler_data,opening=["entities"], closing=["sphinx"]) results_entities = list(set(int(aid[4])>>32 for aid in ids if int(aid[4])>>32)) ntts = {int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(results_entities)} if results_entities else {} profiler.checkpoint(profiler_data, closing=["entities"]) '''# trae entidades relacionadas if ntts: rel_ids = list(set(eid for ntt in ntts.itervalues() for eids in ntt["r"].itervalues() if "r" in ntt for eid in eids)) ntts.update({int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(rel_ids, None, (False, [u"episode"]))}) ''' result = {"time": max(stats["t"].itervalues()) if stats["t"] else 0, "total_found": stats["cs"]} # elimina el id del download de la lista de resultados if download and "file_data" in download and download["file_data"]: download_id = mid2hex(download["file_data"]["file"]["_id"]) ids = list(aid for aid in ids if aid[0]!=download_id) else: download_id = None profiler.checkpoint(profiler_data, opening=["mongo"]) files_dict={str(f["_id"]):secure_fill_data(f,text=query,ntts=ntts) for f in get_files(ids,s)} profiler.checkpoint(profiler_data, closing=["mongo"]) # añade download a los resultados if download_id: files_dict[download_id] = download["file_data"] ids.insert(0,(download_id, -1, -1, -1)) # ordena resultados y añade informacion de la busqueda files = []