Beispiel #1
0
    def set_file_vote(self, file_id, user, lang, vote):
        '''
        Guarda el voto en la colección y actualiza el archivo correspondiente con los nuevos datos
        '''
        data  = {
            "u": user.id,
            "k": 1 if vote == 1 else -1,
            "d": datetime.utcnow(),
            "l": lang,
            }
        # TODO(felipe): borrar con error solucionado
        if user.id < 0 and user.is_authenticated():
            logging.error("Inconsistencia de usuario votando logeado id negativo.", extra=locals())
        else:
            if user.is_authenticated():
                data["_id"] =  "%s_%s" % (mid2hex(file_id), user.id)
                self.user_conn.users.vote.update(
                    {"_id": data["_id"]}, data, upsert=True)
            else:
                data["_id"] = "%s:%s" % (mid2hex(file_id), user.session_ip)
                self.user_conn.users.vote.update(
                    {"_id": data["_id"], "u": data["u"]},
                    data, upsert=True)

        # Para cada idioma guarda el karma, la cuenta total y la suma
        map_function = Code('''
            function()
            {
                emit(this.l,{
                    k:this.k,
                    c:new Array((this.k>0)?1:0,(this.k<0)?1:0),
                    s:new Array((this.k>0)?this.k:0,(this.k<0)?this.k:0)
                })
            }''')
        # Suma todo y aplica la funcion 1/1+E^(-X) para que el valor este entre 0 y 1
        reduce_function = Code('''
            function(lang, vals)
            {
                c=new Array(0,0);
                s=new Array(0,0);
                for (var i in vals)
                {
                    c[0]+=vals[i].c[0];
                    c[1]+=vals[i].c[1];
                    s[0]+=vals[i].s[0];
                    s[1]+=vals[i].s[1];
                }
                return {t:1/(1+Math.exp(-((s[0]*c[0]+s[1]*c[1])/(c[0]+c[1])))), c:c, s:s};
            }''')
        # Tercer parametro para devolverlo en vez de generar una coleccion nueva
        votes = self.user_conn.users.vote.map_reduce(
            map_function,
            reduce_function,
            {"inline": 1},
            query = {"_id": {"$regex": "^%s" % mid2hex(file_id)}}
            )
        # Devolver un diccionario de la forma idioma:valores
        data = {values["_id"]: values["value"] for values in votes["results"]}
        self.user_conn.end_request()
        return data
Beispiel #2
0
def users():
    '''
    Administración de usuarios
    '''

    searchform = SearchUserForm(request.form, prefix="searchform_")

    if request.method == "POST":
        user_data = None
        mode = searchform.mode.data
        identifier = searchform.identifier.data
        if mode == "username": user_data = usersdb.find_username(identifier)
        elif mode == "email": user_data = usersdb.find_email(identifier)
        elif mode == "hexid": user_data = usersdb.find_userid(identifier)
        elif mode == "oauth": user_data = usersdb.find_oauthid(identifier)

        if user_data:
            return redirect(url_for("admin.db_edit", collection="user", document_id=mid2hex(user_data["_id"])))
        else:
            flash("admin_users_not_found","error")

    return render_template('admin/users.html',
        page_title=_('admin_users'),
        user_count=usersdb.count_users(),
        blocked=False,
        search_form=searchform)
Beispiel #3
0
def get_files(ids):
    '''
    Recibe lista de tuplas de tamaño 3 (como devueltas por get_ids de search)
    y devuelve los ficheros del mongo correspondiente que no estén bloqueados.
    Si se recibe un fichero bloqueado, lo omite y bloquea en el sphinx.

    @type ids: iterable de tuplas de tamaño 3 o mayor
    @param ids: lista de tuplas (mongoid, id servidor, id sphinx)

    @yield: cada uno de los resultados de los ids en los mongos

    '''
    toblock = []
    already = False
    for f in filesdb.get_files(ids, servers_known = True, bl = None):
        if f["bl"] == 0 or f["bl"] is None:
            yield f
        else:
            toblock.append(mid2hex(f["_id"]))

    # bloquea en sphinx los ficheros bloqueados
    if toblock:
        cache.cacheme = False
        id_list = {i[0]:i[2] for i in ids}
        block_files( sphinx_ids=[id_list[i] for i in toblock] )
Beispiel #4
0
def search():
    query = request.args.get("q", None)
    if not query:
        flash("write_something")
        return redirect(url_for("index.home"))
    results = {"total_found":0,"total":0,"time":0}

    g.title = "%s - %s" % (query, g.title)

    from time import time

    a=time()
    # obtener los resultados y sacar la paginación
    s = searchd.search(query, None, all_langs.get(g.lang))
    print "a", time()-a; a=time()
    stats = s.get_stats()
    print "b", time()-a; a=time()
    results["time"] = 1
    results["total_found"] = int(stats["cs"])
    ids = list(itertools.islice(s.get_results(), 0, 20))
    print "c", time()-a; a=time()
    files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)}
    print "d", time()-a; a=time()
    files=list(files_dict[file_id[0]] for file_id in ids if file_id[0] in files_dict)
    print "e", time()-a; a=time()

    return render_template('files/search.html',
        results=results,
        search=request.args["q"].split(" "),
        files=files,
        pagination=Pagination(1, 10, min(results["total_found"], 1000)),
        didyoumean=None,
        tags=None)
Beispiel #5
0
def users():
    """
    Administración de usuarios
    """
    # Esta plantilla tiene 2 formularios, filtramos según cuál haya sido enviado

    searchform = SearchUserForm(request.form, prefix="searchform_")

    if request.method == "POST":
        user_data = None
        mode = searchform.mode.data
        identifier = searchform.identifier.data
        if mode == "username":
            user_data = usersdb.find_username(identifier)
        elif mode == "email":
            user_data = usersdb.find_email(identifier)
        elif mode == "hexid":
            user_data = usersdb.find_userid(identifier)
        elif mode == "oauth":
            user_data = usersdb.find_oauthid(identifier)

        if user_data:
            return redirect(url_for("admin.db_edit", collection="user", document_id=mid2hex(user_data["_id"])))
        else:
            flash("admin_users_not_found", "error")

    return render_template("admin/users.html", page_title=_("admin_users"), blocked=False, search_form=searchform)
Beispiel #6
0
def init_data(file_data):
    '''
    Inicializa el diccionario de datos del archivo
    '''
    file_id=mid2hex(file_data["_id"])
    file_data["id"]=mid2url(file_data['_id'])
    file_data['name']=file_data['src'][file_id]['url']
    return {"file":file_data,"view":{}}
Beispiel #7
0
 def get_file_vote(self, file_id, user, lang):
     '''
     Recupera el voto de un usuario para un archivo
     '''
     if user.is_authenticated():
         data = self.user_conn.users.vote.find_one({
             "_id": "%s_%s" % (mid2hex(file_id), user.id),
             "l": lang
             })
     else:
         data = self.user_conn.users.vote.find_one({
             "_id": "%s:%s" % (mid2hex(file_id), user.session_ip),
             "l": lang,
             "u": user.id
             })
     self.user_conn.end_request()
     return data
Beispiel #8
0
def search():
    '''
    Realiza una búsqueda de archivo
    '''

    # TODO: seguridad en param
    #si no se ha buscado nada se manda al inicio
    query = request.args.get("q", None)
    if not query:
        flash("write_something")
        return redirect(url_for("index.home"))

    #para evitar errores cuando en page no viene un número
    page = request.args.get("page", "1")
    if page.isdigit():
        page = int(page)
    else:
        abort(404)

    g.title = "%s - %s" % (query, g.title)
    results = {"total_found":0,"total":0,"time":0}

    didyoumean = None
    tags = None
    if 0 < page < 101:
        #obtener los tags y el quiso decir
        tags, dym = taming_search(query, request.args.get("type", None))

        #obtener los resultados y sacar la paginación
        profiler.checkpoint(opening=["sphinx"])
        results = search_files(query,request.args,page) or results
        ids = get_ids(results)
        profiler.checkpoint(opening=["mongo"], closing=["sphinx"])
        files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)}
        profiler.checkpoint(opening=["visited"], closing=["mongo"])
        save_visited(files_dict.values())
        profiler.checkpoint(closing=["visited"])
        files=({"file":files_dict[bin2hex(file_id[0])], "search":file_id} for file_id in ids if bin2hex(file_id[0]) in files_dict)

        # recupera los resultados del taming
        try:
            tags = tags.next()
            didyoumean = dym.next()
        except:
            pass
    else:
        files = ()

    return render_template('files/search.html',
        results=results,
        search=request.args["q"].split(" "),
        files=files,
        pagination=Pagination(page, 10, min(results["total_found"], 1000)),
        didyoumean=didyoumean,
        tags=tags)
Beispiel #9
0
def search():
    '''
    Realiza una búsqueda de archivo
    '''
    # TODO: seguridad en param
    #si no se ha buscado nada se manda al inicio
    query = request.args.get("q", None)
    if not query:
        flash("write_something")
        return redirect(url_for("index.home"))

    page = int(request.args.get("page", 1))
    g.title = query+" - "+g.title
    results = {"total_found":0,"total":0,"time":0}

    didyoumean = None
    tags = None
    if 0 < page < 101:
        #obtener los tags y el quiso decir
        taming = taming_search(current_app.config, query, request.args.get("type", None), contextg=g._get_current_object())

        #obtener los resultados y sacar la paginación
        profiler.checkpoint(opening=["sphinx"])
        results = search_files(query,request.args,page) or results
        ids = get_ids(results)
        profiler.checkpoint(opening=["mongo"], closing=["sphinx"])
        files_dict = {mid2hex(file_data["_id"]):fill_data(file_data, False, query) for file_data in get_files(ids)}
        profiler.checkpoint(opening=["visited"], closing=["mongo"])
        save_visited(files_dict.values())
        profiler.checkpoint(closing=["visited"])
        files=(files_dict[bin2hex(file_id[0])] for file_id in ids if bin2hex(file_id[0]) in files_dict)

        # recupera los resultados del taming
        try:
            tags = taming.next()
            didyoumean = taming.next()
        except:
            pass
    else:
        files = ()

    return render_template('files/search.html',
        results=results,
        search=request.args["q"].split(" "),
        files=files,
        pagination=Pagination(page, 10, min(results["total_found"], 1000)),
        didyoumean=didyoumean,
        tags=tags)
Beispiel #10
0
def block_files(sphinx_ids=(), mongo_ids=(), block=True):
    """
    Recibe ids de sphinx u ObjectIDs de mongodb de ficheros y los bloquea en el
    sphinx (atributo bl a 1).
    """
    sph = sphinxapi2.SphinxClient()
    sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"])
    sph.SetConnectTimeout(current_app.config["SERVICE_SPHINX_CONNECT_TIMEOUT"])
    sph.SetMatchMode(sphinxapi2.SPH_MATCH_FULLSCAN)
    sph.SetLimits(0, 1, 1, 1)
    sphinx_ids = list(sphinx_ids)
    if mongo_ids:
        # Si recibo ids de mongo, ejecuto una petición múltiple para encontrar
        # los ids de sphinx
        for i in xrange(0, len(mongo_ids), 32):
            # Proceso los ids de mongo en grupos de 32, que es el límite que
            # me permite sphinx
            for mongoid in mongo_ids[i : i + 32]:
                uri1, uri2, uri3 = struct.unpack("III", mid2bin(mongoid))
                sph.ResetFilters()
                sph.SetFilter("uri1", [uri1])
                sph.SetFilter("uri2", [uri2])
                sph.SetFilter("uri3", [uri3])
                sph.AddQuery("", "idx_files", "Searching fileid %s" % mid2hex(mongoid))
            results = sph.RunQueries()
            if results:
                for result in results:
                    if "matches" in result and result["matches"]:
                        sphinx_ids.append(result["matches"][0]["id"])
                    if "warning" in result and result["warning"]:
                        logging.warning(result["warning"])
            else:
                logging.error(sph.GetLastError())
    sph.ResetFilters()
    tr = sph.UpdateAttributes("idx_files", ["bl"], {i: [1 if block else 0] for i in sphinx_ids})
    sph.Close()
    return tr == len(sphinx_ids) and tr == len(mongo_ids)
Beispiel #11
0
def get_files(ids, sphinx_search=None):
    '''
    Recibe lista de tuplas de tamaño 3 o mayor (como las devueltas por search)
    y devuelve los ficheros del mongo correspondiente que no estén bloqueados.
    Si se recibe un fichero bloqueado, lo omite y bloquea en el sphinx.

    @type ids: iterable de tuplas de tamaño 3 o mayor
    @param ids: lista de tuplas (mongoid, id servidor, id sphinx)

    @yield: cada uno de los resultados de los ids en los mongos

    '''
    toblock = []
    for f in filesdb.get_files(ids, servers_known = True, bl = None):
        if f["bl"] == 0 or f["bl"] is None:
            yield f
        else:
            toblock.append((mid2hex(f["_id"]), str(f["s"])))

    # bloquea en sphinx los ficheros bloqueados
    if toblock and sphinx_search:
        cache.cacheme = False
        id_list = {i[0]:i for i in ids}
        sphinx_search.block_files([(id_list[mid][2],server, None, mid, id_list[mid][4]) for mid, server in toblock])
Beispiel #12
0
def download(file_id, file_name=""):
    g.page_type = FILE_PAGE_TYPE
    if request.referrer:
        try:
            posibles_queries = referrer_parser.match(request.referrer)
            if posibles_queries:
                query = posibles_queries.group(1) or posibles_queries.group(2) or ""
                if query:
                    get_query_info(u(urllib2.unquote_plus(query).decode("utf-8")))
        except:
            pass

    error = None
    file_data=None
    if file_id is not None: #si viene un id se comprueba que sea correcto
        try: #intentar convertir el id que viene de la url a uno interno
            file_id=url2mid(file_id)
        except TypeError as e:
            try: #comprueba si se trate de un ID antiguo
                possible_file_id = filesdb.get_newid(file_id)
                if possible_file_id is None:
                    logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id})
                    error=404
                else:
                    logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id})
                    return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301}

            except BaseException as e:
                logging.exception(e)
                error=503

            file_id=None

        if file_id:
            try:
                file_data=get_file_metadata(file_id, file_name.replace("-"," "))
            except DatabaseError:
                error=503
            except FileNotExist:
                error=404
            except (FileRemoved, FileFoofindRemoved, FileNoSources):
                error=410
            except FileUnknownBlock:
                error=404

            if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar
                error=503

    if error:
        abort(error)

    # completa datos de torrent
    file_data = torrents_data(file_data, True, g.category)
    if not file_data:
        abort(404)

    if file_data["view"]["category"]:
        g.category = file_data["view"]["category"]
        if file_data["view"]["category"].tag=="p**n":
            g.is_adult_content = True
    else:
        g.category = file_data["view"]["category_type"]

    # no permite acceder ficheros que deberian ser bloqueados
    prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn'])
    if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]):
        g.blacklisted_content = "File"
        if not g.show_blacklisted_content:
            abort(404)

    query = download_search(file_data, file_name, "torrent").replace("-"," ")
    related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None)

    # elige el titulo de la página
    title = file_data['view']['fn']

    # recorta el titulo hasta el proximo separador
    if len(title)>101:
        for pos in xrange(101, 30, -1):
            if title[pos] in SEPPER:
                title = title[:pos].strip()
                break
        else:
            title = title[:101]

    g.title = [title]

    page_description = ""
    if "description" in file_data["view"]["md"]:
        page_description = file_data["view"]["md"]["description"].replace("\n", " ")

    if not page_description:
        if g.category:
            page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize()
        else:
            page_description = _("download_desc")


    if len(page_description)<50:
        if page_description:
           page_description += ". "
        page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"])

    if len(page_description)>180:
        last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0
        if last_stop<100:
            last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0
        if last_stop<100:
            last_stop = 180
        page_description = page_description[:last_stop]+"."

    g.page_description = page_description

    is_canonical_filename = file_data["view"]["seo-fn"]==file_name

    # registra visita al fichero
    if g.search_bot:
        searchd.log_bot_event(g.search_bot, True)
    else:
        save_visited([file_data])

    if related[0]:
        g.must_cache = 3600

    # last-modified
    g.last_modified = file_data["file"]["ls"]

    return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))
Beispiel #13
0
def lock_file(complaint_id=None):
    '''
    Información y bloqueo de ficheros, puede recibir un id de queja, o una lista de ids (en hex) de ficheros separados por la letra "g"
    '''
    page = request.args.get("page", 0, int)
    mode = request.args.get("show", "old", str)
    size = request.args.get("size", 15, int)

    filenames = {}
    bugged = []
    fileids = ()
    if request.method == 'POST':
        if not "fileids" in request.form:
            searchform = BlockFileSearchForm(request.form)
            identifiers = searchform.identifier.data.split()
            if searchform.mode.data == "hexid":
                fileids = [ mid2hex(hex2mid(i))
                    for i in identifiers
                    if all(x in "0123456789abcdef" for x in i)
                    ]
            elif searchform.mode.data == "b64id":
                fileids = [
                    mid2hex(url2mid(i))
                    for i in identifiers
                    if all(x in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!-" for x in i)
                        and (len(i)*8)%6 == 0
                    ]
            elif searchform.mode.data == "url":
                filenames.update(
                    (
                        mid2hex(fileurl2mid(i)),
                        u".".join(urllib2.unquote(i.split("/")[-1]).split(".")[:-1])
                        )
                    for i in identifiers
                    if i.startswith("http") and len(i.split("//")[1].split("/")) > 3
                    )
                fileids = filenames.keys()
            if not fileids:
                return redirect(url_for('admin.locks', page=page, mode=mode, size=size))
        else:
            block = request.form.get("block", False, bool)
            unblock = request.form.get("unblock", False, bool)
            if block or unblock: # submit confirmar
                if complaint_id: pagesdb.update_complaint({"_id":complaint_id,"processed":True})
                fileids = dict(i.split(":") for i in request.form["fileids"].split(","))
                sphinx_block = []
                sphinx_unblock = []
                for fileid, server in fileids.iteritems():
                    (sphinx_block if block and not unblock else sphinx_unblock).append(fileid)
                    req = {"_id":fileid, "bl": int(block and not unblock)}
                    if server: req["s"] = int(server) # si recibo el servidor, lo uso por eficiencia
                    try:
                        # TODO(felipe): comprobar en qué casos se puede llegar aquí sin "s"
                        filesdb.update_file(req, direct_connection=True, update_sphinx=False)
                    except:
                        flash("No se ha podido actualizar el fichero con id %s" % fileid, "error")
                if sphinx_block:
                    block_files_in_sphinx(mongo_ids=sphinx_block, block=True)
                if sphinx_unblock:
                    block_files_in_sphinx(mongo_ids=sphinx_unblock, block=False)
                flash("admin_locks_locked" if block else "admin_locks_unlocked", "success")
            elif request.form.get("cancel", False, bool): # submit cancelar
                if complaint_id:
                    pagesdb.update_complaint({"_id":complaint_id,"processed":True})
                flash("admin_locks_not_locked", "success")
            return redirect(url_for('admin.locks', page=page, mode=mode, size=size))

    complaint_data = None # Hay un único o ningún registro de queja por formulario
    files_data = OrderedDict() # Pueden haber varios ficheros por formulario
    if complaint_id: # Si hay queja, sólo hay una url, la de la queja
        complaint_data = pagesdb.get_complaint(complaint_id)
        if complaint_data and "urlreported" in complaint_data:
            # extracción el id de complaint["urlreported"] de base64 a hexadecimal
            files_data[mid2hex(fileurl2mid(complaint_data["urlreported"]))] = None
    elif fileids: # Si no hay queja, los ficheros se sacan de la url
        files_data.update((i,None) for i in fileids)

    # Suponemos si queremos bloquear, desbloquear, o mostrar las dos opciones
    # dependiendo de sí de los ficheros están bloqueados o no
    # además rellenamos la información de los ficheros
    blocked = 0
    unblocked = 0
    for fileid in files_data.iterkeys():
        data = filesdb.get_file(fileid, bl=None)
        if data is None and fileid in filenames:
            bugged.append(fileid)
            sid = get_id_server_from_search(fileid, filenames[fileid])
            if sid:
                data = filesdb.get_file(fileid, sid = sid, bl = None)
        files_data[fileid] = data or {}
        if not "bl" in files_data[fileid] or files_data[fileid]["bl"] == 0: unblocked += 1
        else: blocked += 1

    return render_template('admin/lock_file.html',
        page_title=_('admin_locks_fileinfo'),
        complaint_data=complaint_data,
        files_data=files_data,
        filenames = filenames,
        bugged = bugged,
        fileids=",".join(
            "%s:%s" % (fileid, prop["s"] if "s" in prop else "")
            for fileid, prop in files_data.iteritems()),
        blocked=None if blocked and unblocked else blocked > 0,
        list_mode=mode,
        page=page,
        title=admin_title('admin_losfdcks_fileinfo'))
Beispiel #14
0
 def save_visited(self, files):
     try:
         self.searchd.get_redis_connection().publish(VISITED_LINKS_CHANNEL, msgpack.packb([mid2hex(f["file"]["_id"]) for f in files if f]))
     except BaseException as e:
         logging.exception("Can't log visited files.")
Beispiel #15
0
    profiler.checkpoint(profiler_data,opening=["entities"], closing=["sphinx"])

    results_entities = list(set(int(aid[4])>>32 for aid in ids if int(aid[4])>>32))
    ntts = {int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(results_entities)} if results_entities else {}
    profiler.checkpoint(profiler_data, closing=["entities"])
    '''# trae entidades relacionadas
    if ntts:
        rel_ids = list(set(eid for ntt in ntts.itervalues() for eids in ntt["r"].itervalues() if "r" in ntt for eid in eids))
        ntts.update({int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(rel_ids, None, (False, [u"episode"]))})
    '''

    result = {"time": max(stats["t"].itervalues()) if stats["t"] else 0, "total_found": stats["cs"]}

    # elimina el id del download de la lista de resultados
    if download and "file_data" in download and download["file_data"]:
        download_id = mid2hex(download["file_data"]["file"]["_id"])
        ids = list(aid for aid in ids if aid[0]!=download_id)
    else:
        download_id = None

    profiler.checkpoint(profiler_data, opening=["mongo"])
    files_dict={str(f["_id"]):secure_fill_data(f,text=query,ntts=ntts) for f in get_files(ids,s)}
    profiler.checkpoint(profiler_data, closing=["mongo"])

    # añade download a los resultados
    if download_id:
        files_dict[download_id] = download["file_data"]
        ids.insert(0,(download_id, -1, -1, -1))

    # ordena resultados y añade informacion de la busqueda
    files = []