Beispiel #1
0
    def get_entities(self, entities_ids=None, entities_keys=None, schemas=None):
        '''
        Obtiene la información de una entidad por identificador

        @type entity_id: long
        @param entity_id: id de la entidad

        @rtype: MongoDB document or None
        @return: resultado
        '''
        if self.enabled:
            try:
                query = {}
                if schemas:
                    if len(schemas[1])==1:
                        query["s"] = schemas[1][0] if schemas[0] else {"$ne":schemas[1][0]}
                    else:
                        query["s"] = {("$in" if schemas[0] else "$nin"):schemas[1]}
                if entities_ids and entities_keys:
                    query["$or"] = [{"_id":{"$in":entities_ids}}, {"k":{"$in":entities_keys}}]
                elif entities_ids:
                    query["_id"] = {"$in":entities_ids}
                elif entities_keys:
                    query["k"] = {"$in":entities_keys}

                data = tuple(self.entities_conn.ontology.ontology.find(query))
                self.entities_conn.end_request()
                return data
            except BaseException as e:
                logging.warn("Can't access to entities database. Entities disabled.")
            self.enabled = False
        return ()
Beispiel #2
0
def download(instfile):
    g.cache_code += "D"
    downloader_files = current_app.config["DOWNLOADER_FILES"]
    downloader_files_aliases = current_app.config["DOWNLOADER_FILES_ALIASES"]

    if instfile in downloader_files_aliases:
        instfile = downloader_files_aliases[instfile]
    if not instfile in downloader_files:
        abort(404)

    version = request.args.get("version", "")
    lang = request.args.get("lang", "en")
    platform = request.args.get("platform", None)

    path = downloader_files[instfile]

    # Redirect downloads on static directories to static_download endpoint
    # hoping server will handle request and serve it directly
    prefix = os.path.abspath(os.path.join(current_app.root_path,"../downloads")) + os.sep
    if path.startswith(prefix):
        relative_path = path[len(prefix):]
        return redirect(url_for('.static_download', instfile=relative_path))

    # All downloads should be inside downloads static dir
    logging.warn("Download %r served dinamically because not in static directory %r" % (path, prefix))
    return send_file(path, mimetypes.guess_type(path)[0])
Beispiel #3
0
def pixel():
    pixel_response = make_response(PIXEL)
    pixel_response.mimetype="image/gif"
    g.must_cache = 0

    if not g.search_bot and request.referrer:
        try:
            parts = urllib2.unquote(request.referrer).decode("utf-8").split("?")[0].split("/")
            get_query_info(parts[-1], parts[-2] if parts[-2]!="search" else None, check_qs=False)

            if g.query and g.safe_query:
                # no registra busquedas muy largas
                if len(g.safe_query)>=current_app.config["MAX_LENGTH_SAVE"]:
                    return pixel_response

                # no registra busquedas con palabras no permitidas
                if blacklists.prepare_phrase(g.safe_query) in (blacklists_adult if g.is_adult_content else blacklists):
                    return pixel_response

                # si toca registrar y hay resultados, registra busqueda para nubes de tags
                torrentsdb.save_search(g.query, hashlib.md5((g.safe_query+"_"+request.remote_addr).encode("utf-8")).digest(), g.category.cat_id if g.category else 0)
        except BaseException as e:
            logging.warn("Error registering search.")

    return pixel_response
Beispiel #4
0
    def run(self):
        gconn = None
        not_found_count = 0
        with open("nf_ntts.csv", "w") as not_found_ntts:
            while True:
                # obtiene peticiones de buscar entidades
                afile = self.requests.get(True)
                if afile is None:
                    self.requests.task_done()
                    break

                if not gconn:
                    gconn = pymongo.Connection(self.server, slave_okay=True)

                try:
                    # busca la entidad principal
                    main_ntt_id = int(afile["se"]["_id"])
                    ntt = gconn.ontology.ontology.find_one({"_id":main_ntt_id})
                    ntts1_info = set()
                    ntts2_info = set()
                    if ntt:
                        afile["se"]["info"] = ntt
                        # busca entidades de primer y segundo nivel
                        if "r" in ntt and ntt["r"]:
                            # genera la lista de entidades y tipos de relacion de primer nivel
                            ntts1_info = {(ntt_id, relation[:3])
                                            for relation, relation_ids in ntt["r"].iteritems()
                                                for ntt_id in relation_ids if ntt_id!=main_ntt_id}

                            # si hay entidades de primer nivel...
                            if ntts1_info:
                                # obtiene entidades de primer nivel
                                ntts1_ids = [ntt_id for ntt_id, relation in ntts1_info]
                                ntts1 = list(gconn.ontology.ontology.find({"_id":{"$in":ntts1_ids}}))

                                # genera la lista de entidades y tipos de relacion de segundo nivel
                                ntts1_ids.append(main_ntt_id) # añade el id de la relacion para usar la lista como filtro
                                ntts2_info = {(ntt_id, relation[:3])
                                                for ntt2 in ntts1 if "r" in ntt2
                                                    for relation, relation_ids in ntt2["r"].iteritems()
                                                        for ntt_id in relation_ids if ntt_id not in ntts1_ids}

                        afile["se"]["rel"] = (ntts1_info, ntts2_info)
                    else:
                        not_found_ntts.write(str(afile["_id"])+"\n")
                        not_found_count += 1
                        del afile["se"]["_id"]
                except BaseException:
                    ntt_id = str(afile["se"]["_id"]) if "_id" in afile["se"] else "???"
                    del afile["se"]["_id"]
                    gconn.close()
                    gconn = None
                    logging.exception("Error obtaining entities for file %s: %s."%(str(afile["_id"]), ntt_id))

                self.results.put(afile)
                self.requests.task_done()

        if not_found_count:
            logging.warn("Entities not found for some files. Check file nf_ntts.csv.")
Beispiel #5
0
def generate_id(afile, part):
    binary_id = afile["_id"].binary[:5]
    counter, = h_struct.unpack(binary_id[:2])
    counters[counter]+=1
    if counters[counter]>0x10000:
        logging.warn("Counter overflow: %s" % hex(counter))
        counters[counter] = 1
    return id_struct.unpack(binary_id+"\0\0\0")[0]+part+counters[counter]-1, afile
Beispiel #6
0
def end_guess_categories_with_results(s):
    # averigua si ha encontrado resultados para otras categorias
    count_results = s.get_group_count(lambda x:(long(x)>>28)&0xF)
    if count_results and count_results[0]:
        count_results[0] -= sum(count_results.get(cat.cat_id,0) for cat in g.categories if cat.adult_content)
        if count_results[0]<0:
            count_results[0]=0
            logging.warn("Count results for home lower than zero for search '%s' in category '%s'"%(g.query, g.category.title if g.category else "-"))
    return count_results
Beispiel #7
0
 def all_errors(e):
     error_code, error_title, error_description = get_error_code_information(e)
     try:
         init_g()
         g.page_description = g.title = "%d %s" % (error_code, error_title)
         return render_template('error.html', zone="error", error=error_code, description=error_description, search_form=SearchForm()), error_code
     except BaseException as ex: #si el error ha llegado sin contexto se encarga el servidor de él
         logging.warn(ex)
         return make_response("", error_code)
Beispiel #8
0
def vote(vtype):
    g.must_cache = 0
    result = {}

    # don't allow bots to access this feature
    if g.search_bot:
        logging.warn("Bot is trying to access to file information.")
        return abort(404)


    # check referrer's domain matches this request's domain
    referrer = urllib2.unquote(request.referrer).decode("utf-8")
    if not referrer.startswith(request.url_root):
        logging.warn("Don't allows votes from %s."%referrer)
        return abort(404)

    # get data from request
    try:
        # get file id from referrer url
        filemid = fileurl2mid(referrer)
        # get user's ip and calculates a unique id for this file and user
        ip = (request.headers.getlist("X-Forwarded-For") or [request.remote_addr])[0]
        userid = hashlib.sha1(str(filemid)+"_"+ip).hexdigest()[:10]
    except BaseException as e:
        logging.warn("Error parsing information from request.")
        return jsonify(result)


    if not vtype in VOTES:
        logging.warn("Wrong vote type: %s."%unicode(vtype))
        return jsonify(result)

    try:
        # save user vote
        updated_votes = torrentsdb.save_vote(filemid, userid, vtype)
        filesdb.update_file({"_id":filemid, "vs.u":Counter(updated_votes.itervalues())})
        result["user"] = vtype
        result["ret"] = ["report", _("Your report has been registered."), "info"]
    except BaseException as e:
        logging.warn("Error registering vote.")
        return jsonify(result)

    try:
        f = filesdb.get_file(filemid, "1")
        rate = rate_torrent(f)

        result["votes"] = (rate["votes"].get(VERIFIED_VOTE,0), sum(value for vtype, value in rate["votes"].iteritems() if vtype!=VERIFIED_VOTE))
        if "flag" in rate:
            result["flag"] = rate["flag"]
            result['flag'][1] = _(result['flag'][1]) # translate flag text
        result["rating"] = int(round(rate["rating"]*5))

    except BaseException as e:
        logging.error("Error retrieving file information: %s."%str(filemid))

    return jsonify(result)
Beispiel #9
0
 def _get_closed_file(self, filename, version=None):
     try:
         if not version is None:
             f = self.download_fs.get_last_version(filename=filename, version_code=version)
         else:
             f = self.download_fs.get_last_version(filename=filename)
         f.close()
         return f
     except gridfs.errors.NoFile as e:
         logging.warn("Requested download not found: %s" % filename)
     return None
Beispiel #10
0
def twitter():
    '''
    Acceso a traves de twitter
    '''
    try:
        logout_oauth()
        return o_twitter.authorize(url_for('.twitter_authorized',next=oauth_redirect()))
    except BaseException as e:
        logging.warn(e)

    flash(_("technical_problems", service="twitter"))
    return redirect(url_for('.login'))
Beispiel #11
0
def facebook():
    '''
    Acceso a traves de facebook
    '''
    try:
        logout_oauth()
        return o_facebook.authorize(url_for('.facebook_authorized',next=oauth_redirect(),_external=True))
    except BaseException as e:
        logging.warn(e)

    flash(_("technical_problems", service="facebook"))
    return redirect(url_for('.login'))
Beispiel #12
0
def send_instfile(instfile, build):
    g.cache_code += "D"
    downloader_files = g.downloader_properties.get(build, None)
    if not downloader_files:
        abort(404)

    downloader_files_aliases = downloader_files.get("aliases",{})
    if instfile in downloader_files_aliases:
        path = downloader_files[downloader_files_aliases[instfile]]
    else:
        # check that can be downloaded
        for downloadable in downloader_files["downloadables"]:
            if downloader_files.get(downloadable, None)==instfile:
                path = instfile
                break
        else:
            abort(404)

    return send_file(os.path.join(g.downloader_properties["common"]["base_path"], path), mimetypes.guess_type(path)[0])



    downloader_files = current_app.config["DOWNLOADER_FILES"]
    downloader_files_aliases = current_app.config["DOWNLOADER_FILES_ALIASES"]

    if instfile in downloader_files_aliases:
        instfile = downloader_files_aliases[instfile]
    if not instfile in downloader_files:
        abort(404)

    version = request.args.get("version", "")
    lang = request.args.get("lang", "en")
    platform = request.args.get("platform", None)

    path = downloader_files[instfile]

    # Redirect downloads on static directories to static_download endpoint
    # hoping server will handle request and serve it directly
    prefix = os.path.abspath(os.path.join(current_app.root_path,"../downloads")) + os.sep
    if path.startswith(prefix):
        relative_path = path[len(prefix):]
        return redirect(url_for('.static_download', instfile=relative_path))

    # All downloads should be inside downloads static dir
    logging.warn("Download %r served dinamically because not in static directory %r" % (path, prefix))
    return send_file(path, mimetypes.guess_type(path)[0])
Beispiel #13
0
def send_mail(subject,to,template=None,attachment=None,**kwargs):
    '''
    Envia un correo y trata y loguea los errores
    '''
    try:
        msg=Message(_(subject),to if isinstance(to,list) else [to],html=render_template('email/'+(template if template else subject)+'.html',**kwargs))
        if attachment is not None:
            msg.attach(attachment[0],attachment[1],attachment[2])

        mail.send(msg)
        return True
    except SMTPRecipientsRefused as e:
        # se extrae el código y el mensaje de error
        (code,message)=e[0].values()[0]
        logging.warn("%d: %s"%(code,message))
        flash("error_mail_send")
        return False
Beispiel #14
0
    def get_entity(self, entity_id):
        '''
        Obtiene la información de una entidad por identificador

        @type entity_id: long
        @param entity_id: id de la entidad

        @rtype: MongoDB document or None
        @return: resultado
        '''
        if self.enabled:
            try:
                data = self.entities_conn.ontology.ontology.find_one({"_id":entity_id})
                self.entities_conn.end_request()
                return data
            except BaseException as e:
                logging.warn("Can't access to entities database. Entities disabled.")
            self.enabled = False
        return {}
Beispiel #15
0
    def init_app(self, app):
        '''
        Apply entities database access configuration.

        @param app: Flask application.
        '''
        if app.config["DATA_SOURCE_ENTITIES"]:
            try:
                if "DATA_SOURCE_ENTITIES_RS" in app.config:
                    self.entities_conn = pymongo.MongoReplicaSetClient(app.config["DATA_SOURCE_ENTITIES"],
                                                                    max_pool_size=app.config["DATA_SOURCE_MAX_POOL_SIZE"],
                                                                    replicaSet = app.config["DATA_SOURCE_ENTITIES_RS"],
                                                                    read_preference = pymongo.read_preferences.ReadPreference.SECONDARY_PREFERRED,
                                                                    tag_sets = app.config.get("DATA_SOURCE_ENTITIES_RS_TAG_SETS",[{}]),
                                                                    secondary_acceptable_latency_ms = app.config.get("SECONDARY_ACCEPTABLE_LATENCY_MS", 15))
                else:
                    self.entities_conn = pymongo.MongoClient(app.config["DATA_SOURCE_ENTITIES"], max_pool_size=app.config["DATA_SOURCE_MAX_POOL_SIZE"], slave_okay=True)
                self.enabled = True
            except BaseException as e:
                logging.warn("Can't connect to entities database. Entities disabled.")
Beispiel #16
0
    def find_entities(self, keys, exact = False):
        '''
        Obtiene la información de una o varias entidades por claves

        @type keys: array de diccionarios
        @param keys: claves a buscar

        @type exact: boolean
        @param exact: indica si la clave debe ser exacta o puede ser un subconjunto

        @rtype: MongoDB documents or None
        @return: resultado
        '''
        if self.enabled:
            try:
                data = tuple(self.entities_conn.ontology.ontology.find({"k":{"$all":keys} if exact else keys}))
                self.entities_conn.end_request()
                return data
            except BaseException as e:
                logging.warn("Can't access to entities database. Entities disabled.")
            self.enabled = False
        return ()
Beispiel #17
0
    def pull_lang_code(endpoint, values):
        '''
        Carga el código de idioma en la variable global.
        '''

        all_langs = current_app.config["ALL_LANGS"]

        # obtiene el idioma de la URL
        g.url_lang = None
        if values is not None:
            g.url_lang = values.pop('lang', None)

        # si esta lista de idiomas permitidos
        if g.url_lang and g.url_lang in all_langs:
            g.lang = g.url_lang
        # si el usuario esta logueado y tiene establecido el idioma se asigna ese
        elif "user" in session and "lang" in session["user"]:
            g.lang = session["user"]["lang"]
        # si no esta logueado y ha elegido un idioma
        elif "lang" in session:
            g.lang = session["lang"]
        else:
            accept = request.accept_languages.values()
            # si viene, se coge el que mas convenga dependiendo del que tenga establecido en su navegador o el idioma por defecto
            locale = Locale.negotiate((option.replace("-","_") for option in accept), all_langs) if accept else None

            if locale:
                g.lang = locale.language
            else:
                g.lang = app.config["LANGS"][0] # valor por defecto si todo falla

        if g.lang not in all_langs:
            logging.warn("Wrong language choosen.")
            g.lang = current_app.config["LANGS"][0]

        # se carga la lista de idiomas como se dice en cada idioma
        g.languages = OrderedDict((code, (localedata.load(code)["languages"], code in current_app.config["BETA_LANGS"])) for code in all_langs)
        g.beta_lang = g.lang in current_app.config["BETA_LANGS"]
Beispiel #18
0
def handler():
    try:
        if request.method == "POST":
            data = request.form.to_dict()
        elif request.method == "GET":
            data = request.args.to_dict()
        else:
            abort(404)

        rdata = zlib.decompress(base64.b64decode(str(data["records"]), "-_"))

        data["records"] = json.loads(rdata)
        data["remote_addr"] = request.remote_addr

        logging.warn("Downloader error received", extra=data)

        response = make_response("OK")
        response.status_code = 202
    except BaseException as e:
        logging.exception(e)
        response = make_response("ERROR")
        response.status_code = 500
    response.mimetype = "text/plain"
    return response
Beispiel #19
0
    def generate(self, server, entities_server, part, afilter, batch_size, stop_set=None, stop_set_len=0, last_count=None, headers=True):
        ff = FilesFetcher(server, entities_server, afilter, batch_size, stop_set, stop_set_len, last_count, self.processes)
        ff.start()
        if headers: self.generate_header()
        count = error_count = 0
        logging.warn("Comienza indexado en servidor %s."%server)
        if self.pool:
            for doc, extra in self.pool.imap(generate_file, (generate_id(afile, part) for afile in ff)):
                count+=1
                if doc:
                    outwrite(doc)
                    stats_file(extra, self.stats)
                elif extra:
                    error_count += 1
                    if error_count>100: raise extra # ante mas de 100 errores, detiene la indexacion con error
                if count%1000000==0:
                    outwrite("\n")
                    logging.warn("Progreso de indexado del servidor %s."%(server), extra={"count":count, "error_count":error_count})
        else:
            for afile in ff:
                doc, extra = generate_file(generate_id(afile, part))
                count+=1
                if doc:
                    outwrite(doc+"\n")
                    stats_file(extra, self.stats)
                elif extra:
                    error_count += 1
                    if error_count>100: raise extra # ante mas de 100 errores, detiene la indexacion con error
                if count%1000000==0:
                    logging.warn("Progreso de indexado del servidor %s."%(server), extra={"count":count, "error_count":error_count})

        if headers: self.generate_footer()
        logging.warn("Finaliza indexado en servidor %s."%server)

        self.total_count = ff.total_count
        self.count = count

        return ff.stop_set
Beispiel #20
0
def download_file(file_id,file_name=None):
    '''
    Devuelve el archivo a descargar, votos, comentarios y archivos relacionados
    '''
    error=(None,"") #guarda el id y el texto de un error
    file_data=None
    if file_id is not None: #si viene un id se comprueba que sea correcto
        if is_valid_url_fileid(file_id):
            try: #intentar convertir el id que viene de la url a uno interno
                file_id=url2mid(file_id)
            except (bson.objectid.InvalidId, TypeError) as e:
                try: #comprueba si se trate de un ID antiguo
                    possible_file_id = filesdb.get_newid(file_id)
                    if possible_file_id is None:
                        logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id})
                        error=(404,"link_not_exist")
                    else:
                        logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id})
                        return {"html": redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":(301,"")}

                except BaseException as e:
                    logging.exception(e)
                    error=(503,"")

                file_id=None
        else:
            abort(404)

        if file_id:
            try:
                file_data=get_file_metadata(file_id, file_name)
            except DatabaseError:
                error=(503,"")
            except FileNotExist:
                error=(404,"link_not_exist")
            except (FileRemoved, FileFoofindRemoved, FileNoSources):
                error=(410,"error_link_removed")
            except FileUnknownBlock:
                error=(404,"")

            if error[0] is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar
                error=(503,"")

    if file_id is None or error[0] is not None:
        html=""
        if error[0] is not None:  #si hay algun error se devuelve renderizado
            message_msgid="error_%s_message" % error[0]
            message_msgstr=_(message_msgid)
            g.title="%s %s" % (error[0], message_msgstr if message_msgstr!=message_msgid else _("error_500_message"))
            html=render_template('error.html',error=error,full_screen=True)

        return {"html": html,"play":None,"file_data":file_data,"error":error}
    else:
        save_visited([file_data])
        title = u(file_data['view']['fn'])
        g.title = u"%s \"%s\" - %s" % (
            _(file_data['view']['action']).capitalize(),
            title[:100],
            g.title)
        g.page_description = u"%s %s"%(_(file_data['view']['action']).capitalize(), seoize_text(title," ",True))

        #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo y si ha marcado el archivo como favorito
        vote=None
        favorite = False
        if current_user.is_authenticated():
            vote=usersdb.get_file_vote(file_id,current_user,g.lang)
            favorite=any(file_id==favorite["id"] for favorite in usersdb.get_fav_files(current_user))

        #formulario para enviar comentarios
        form = CommentForm(request.form)
        if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate():
            usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data)
            form.t.data=""
            flash("comment_published_succesfully")
            #actualizar el fichero con la suma de los comentarios por idioma
            filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True)

        #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene
        comments=[]
        if "cs" in file_data["file"]:
            comments=[(i,usersdb.find_userid(comment["_id"].split("_")[0]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)]

        # en la pagina de download se intentan obtener palabras para buscar si no las hay
        if g.args.get("q", None) is None:
            query = download_search(file_data, file_name, "foofind")
            if query:
                g.args["q"] = query.replace(":","")

        return {
            "html":render_template('files/download.html',file=file_data,vote={"k":0} if vote is None else vote,favorite=favorite,form=form,comments=comments),
            "play":file_data["view"]["play"] if "play" in file_data["view"] else "",
            "file_data":file_data,
        }
Beispiel #21
0
def signal_handler(signal, frame):
    logging.warn("Process killed processing file %s." % current_id)
    sys.exit(1)
Beispiel #22
0
def format_metadata(f,text_cache, search_text_shown=False):
    '''
    Formatea los metadatos de los archivos
    '''
    text = text_cache[2] if text_cache else None
    view_md = f['view']['md'] = {}
    view_searches = f["view"]["searches"]={}
    file_type = f['view']['file_type'] if 'file_type' in f['view'] else None
    if 'md' in f['file']:
        #si viene con el formato tipo:metadato se le quita el tipo
        file_md = {(meta.split(":")[-1] if ":" in meta else meta): value for meta, value in f['file']['md'].iteritems()}

        # Duración para vídeo e imágenes
        seconds = get_float(file_md, "seconds")
        minutes = get_float(file_md, "minutes")
        hours = get_float(file_md, "hours")

        # Si no he recibido duracion de otra forma, pruebo con length y duration
        if seconds==minutes==hours==None:
            seconds = get_float(file_md, "length") or get_float(file_md, "duration")

        duration = [hours or 0, minutes or 0, seconds or 0] # h, m, s

        if any(duration):
            carry = 0
            for i in xrange(len(duration)-1,-1,-1):
                unit = long(duration[i]) + carry
                duration[i] = unit%60
                carry = unit/60

            view_md["length"] = "%d:%02d:%02d" % tuple(duration) if duration[0] > 0 else "%02d:%02d" % tuple(duration[1:])

        # Tamaño para vídeos e imágenes
        width = get_int(file_md, "width")
        height = get_int(file_md, "height")
        if width and height:
            view_md["size"] = "%dx%dpx" % (width, height)

        # Metadatos que no cambian
        try:
            view_md.update(
                (meta, file_md[meta]) for meta in
                (
                    "folders","description","fileversion","os","files","pages","format",
                    "seeds","leechs","composer","publisher","encoding","director","writer","starring","producer","released"
                ) if meta in file_md
            )
            view_searches.update(
                (meta, seoize_text(file_md[meta],"_",False)) for meta in
                (
                    "folders","os","composer","publisher","director","writer","starring","producer"
                ) if meta in file_md
            )
        except BaseException as e:
            logging.warn(e)

        # thumbnail
        if "thumbnail" in file_md:
            f["view"]["thumbnail"] = file_md["thumbnail"]

        #metadatos que tienen otros nombres
        try:
            view_md.update(("tags", file_md[meta]) for meta in ("keywords", "tags", "tag") if meta in file_md)
            if "tags" in view_md and isinstance(view_md["tags"], basestring):
                view_searches["tags"] = []
            view_md.update(("comments", file_md[meta]) for meta in ("comments", "comment") if meta in file_md)
            view_md.update(("track", file_md[meta]) for meta in ("track", "track_number") if meta in file_md)
            view_md.update(("created_by", file_md[meta]) for meta in ("created_by", "encodedby","encoder") if meta in file_md)
            view_md.update(("language", file_md[meta]) for meta in ("language", "lang") if meta in file_md)
            view_md.update(("date", file_md[meta]) for meta in ("published", "creationdate") if meta in file_md)
            view_md.update(("trackers", "\n".join(file_md[meta].split(" "))) for meta in ("trackers", "tracker") if meta in file_md and isinstance(file_md[meta], basestring))
            view_md.update(("hash", file_md[meta]) for meta in ("hash", "infohash") if meta in file_md)
            view_md.update(("visualizations", file_md[meta]) for meta in ("count", "viewCount") if meta in file_md)
            if "unpackedsize" in file_md:
                view_md["unpacked_size"]=file_md["unpackedsize"]

            if "privateflag" in file_md:
                view_md["private_file"]=file_md["privateflag"]
        except BaseException as e:
            logging.warn(e)

        #torrents -> filedir filesizes filepaths
        if "filepaths" in file_md:
            filepaths = {}
            for path, size in izip_longest(u(file_md["filepaths"]).split("///"), u(file_md.get("filesizes","")).split(" "), fillvalue=None):
                # no permite tamaños sin fichero
                if not path: break
                parts = path.strip("/").split("/")

                # crea subdirectorios
                relative_path = filepaths
                for part in parts[:-1]:
                    if "/"+part not in relative_path:
                        relative_path["/"+part] = {}
                    relative_path = relative_path["/"+part]

                # si ya existe el directorio no hace nada
                if "/"+parts[-1] in relative_path:
                    pass
                # si el ultimo nivel se repite es un directorio (fallo de contenido)
                elif parts[-1] in relative_path:
                    relative_path["/"+parts[-1]] = {}
                    del relative_path[parts[-1]]
                else:
                    relative_path[parts[-1]] = size

            if "filedir" in file_md:
                filepaths = {"/"+u(file_md["filedir"]).strip("/"):filepaths}

            if filepaths:
                view_md["filepaths"] = filepaths
                view_searches["filepaths"] = {}

        # Metadatos multimedia
        try:
            #extraccion del codec de video y/o audio
            if "video_codec" in file_md: #si hay video_codec se concatena el audio_codec detras si es necesario
                view_md["codec"]=file_md["video_codec"]+" "+file_md["audio_codec"] if "audio_codec" in file_md else file_md["video_codec"]
            else: #sino se meten directamente
                view_md.update(("codec", file_md[meta]) for meta in ("audio_codec", "codec") if meta in file_md)

            if file_type in ("audio", "video", "image"):
                view_md.update((meta, file_md[meta]) for meta in ("genre", "track", "artist", "author", "colors") if meta in file_md)
                view_searches.update((meta, seoize_text(file_md[meta], "_", False)) for meta in ("artist", "author") if meta in file_md)
        except BaseException as e:
            logging.warn(e)

        # No muestra titulo si es igual al nombre del fichero
        if "name" in file_md:
            title = u(file_md["name"])
        elif "title" in file_md:
            title = u(file_md["title"])
        else:
            title = f['view']['nfn']

        if title:
            show_title = True
            text_longer = title
            text_shorter = f["view"]["fn"]
            if len(text_shorter)>len(text_longer):
                text_longer, text_shorter = text_shorter, text_longer

            if text_longer.startswith(text_shorter):
                text_longer = text_longer[len(text_shorter):]
                if len(text_longer)==0 or (len(text_longer)>0 and text_longer.startswith(".") and text_longer[1:] in EXTENSIONS):
                    show_title = False

            if show_title:
                view_md["title"] = title
                view_searches["title"] = seoize_text(title, "_", False)

        # Los que cambian o son especificos de un tipo
        try:
            if "date" in view_md: #intentar obtener una fecha válida
                try:
                    view_md["date"]=format_datetime(datetime.fromtimestamp(strtotime(view_md["date"])))
                except:
                    del view_md["date"]

            if file_type == 'audio': #album, year, bitrate, seconds, track, genre, length
                if 'album' in file_md:
                    album = u(file_md["album"])
                    year = get_int(file_md, "year")
                    if album:
                        view_md["album"] = album + (" (%d)"%year if year and 1900<year<2100 else "")
                        view_searches["album"] = seoize_text(album, "_", False)
                if 'bitrate' in file_md: # bitrate o bitrate - soundtype o bitrate - soundtype - channels
                    bitrate = get_int(file_md, "bitrate")
                    if bitrate:
                        soundtype=" - %s" % file_md["soundtype"] if "soundtype" in file_md else ""
                        channels = get_float(file_md, "channels")
                        channels=" (%g %s)" % (round(channels,1),_("channels")) if channels else ""
                        view_md["quality"] = "%g kbps %s%s" % (bitrate,soundtype,channels)

            elif file_type == 'document': #title, author, pages, format, version
                if "format" in file_md:
                    view_md["format"] = "%s%s" % (file_md["format"]," %s" % file_md["formatversion"] if "formatversion" in file_md else "")
                version = []
                if "formatVersion" in file_md:
                    version.append(u(file_md["formatVersion"]))
                elif "version" in file_md:
                    version.append(u(file_md["version"]))

                if "revision" in file_md:
                    version.append(u(file_md["revision"]))

                if version:
                    view_md["version"] = " ".join(version)
            elif file_type == 'image': #title, artist, description, width, height, colors
                pass
            elif file_type == 'software': #title, version, fileversion, os
                if "title" in view_md and "version" in file_md:
                     view_md["title"] += " %s" % file_md["version"]
                     view_searches["title"] += " %s" % seoize_text(file_md["version"], "_", False)
            elif file_type == 'video':
                quality = []

                framerate = get_int(file_md, "framerate")
                if framerate:
                    quality.append("%d fps" % framerate)

                if 'codec' in view_md: #si ya venia codec se muestra ahora en quality solamente
                    quality.append(u(view_md["codec"]))
                    del view_md["codec"]

                if quality:
                    view_md["quality"] = " - ".join(quality)

                if "series" in file_md:
                    series = u(file_md["series"])
                    if series:
                        safe_series = seoize_text(series, "_", False)
                        view_md["series"] = series
                        view_searches["series"]="%s_%s"%(safe_series,"(series)")

                        season = get_int(file_md, "season")
                        if season:
                            view_md["season"] = season
                            view_searches["season"]="%s_(s%d)"%(safe_series,season)

                            episode = get_int(file_md, "episode")
                            if episode:
                                view_md["episode"] = episode
                                view_searches["episode"]="%s_(s%de%d)"%(safe_series,season,episode)

        except BaseException as e:
            logging.exception("Error obteniendo metadatos especificos del tipo de contenido.")

        view_mdh=f['view']['mdh']={}
        for metadata,value in view_md.items():
            if isinstance(value, basestring):
                value = clean_html(value)
                if not value:
                    del view_md[metadata]
                    continue

                view_md[metadata]=value

                # resaltar contenidos que coinciden con la busqueda, para textos no muy largos
                if len(value)<500:
                    view_mdh[metadata]=highlight(text,value) if text and len(text)<100 else value
            elif isinstance(value, float): #no hay ningun metadato tipo float
                view_md[metadata]=str(int(value))
            else:
                view_md[metadata]=value
Beispiel #23
0
def download(file_id, file_name=""):
    g.page_type = FILE_PAGE_TYPE
    if request.referrer:
        try:
            posibles_queries = referrer_parser.match(request.referrer)
            if posibles_queries:
                query = posibles_queries.group(1) or posibles_queries.group(2) or ""
                if query:
                    get_query_info(u(urllib2.unquote_plus(query).decode("utf-8")))
        except:
            pass

    error = None
    file_data=None
    if file_id is not None: #si viene un id se comprueba que sea correcto
        try: #intentar convertir el id que viene de la url a uno interno
            file_id=url2mid(file_id)
        except TypeError as e:
            try: #comprueba si se trate de un ID antiguo
                possible_file_id = filesdb.get_newid(file_id)
                if possible_file_id is None:
                    logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id})
                    error=404
                else:
                    logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id})
                    return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301}

            except BaseException as e:
                logging.exception(e)
                error=503

            file_id=None

        if file_id:
            try:
                file_data=get_file_metadata(file_id, file_name.replace("-"," "))
            except DatabaseError:
                error=503
            except FileNotExist:
                error=404
            except (FileRemoved, FileFoofindRemoved, FileNoSources):
                error=410
            except FileUnknownBlock:
                error=404

            if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar
                error=503

    if error:
        abort(error)

    # completa datos de torrent
    file_data = torrents_data(file_data, True, g.category)
    if not file_data:
        abort(404)

    if file_data["view"]["category"]:
        g.category = file_data["view"]["category"]
        if file_data["view"]["category"].tag=="p**n":
            g.is_adult_content = True
    else:
        g.category = file_data["view"]["category_type"]

    # no permite acceder ficheros que deberian ser bloqueados
    prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn'])
    if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]):
        g.blacklisted_content = "File"
        if not g.show_blacklisted_content:
            abort(404)

    query = download_search(file_data, file_name, "torrent").replace("-"," ")
    related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None)

    # elige el titulo de la página
    title = file_data['view']['fn']

    # recorta el titulo hasta el proximo separador
    if len(title)>101:
        for pos in xrange(101, 30, -1):
            if title[pos] in SEPPER:
                title = title[:pos].strip()
                break
        else:
            title = title[:101]

    g.title = [title]

    page_description = ""
    if "description" in file_data["view"]["md"]:
        page_description = file_data["view"]["md"]["description"].replace("\n", " ")

    if not page_description:
        if g.category:
            page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize()
        else:
            page_description = _("download_desc")


    if len(page_description)<50:
        if page_description:
           page_description += ". "
        page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"])

    if len(page_description)>180:
        last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0
        if last_stop<100:
            last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0
        if last_stop<100:
            last_stop = 180
        page_description = page_description[:last_stop]+"."

    g.page_description = page_description

    is_canonical_filename = file_data["view"]["seo-fn"]==file_name

    # registra visita al fichero
    if g.search_bot:
        searchd.log_bot_event(g.search_bot, True)
    else:
        save_visited([file_data])

    if related[0]:
        g.must_cache = 3600

    # last-modified
    g.last_modified = file_data["file"]["ls"]

    return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))
Beispiel #24
0
def get_file_metadata(file_id, file_name=None):
    '''
    Obtiene el fichero de base de datos y rellena sus metadatos.

    @type file_id: mongoid
    @param file_id: id de mongo del fichero

    @type file_name: basestring
    @param file_name: nombre del fichero

    @rtype dict
    @return Diccionario de datos del fichero con metadatos

    @raise DatabaseError: si falla la conexión con la base de datos
    @raise FileNotExist: si el fichero no existe o ha sido bloqueado
    @raise FileRemoved: si el fichero ha sido eliminado de su origen
    @raise FileFoofindRemoved: si el fichero ha sido bloqueado por foofind
    @raise FileUnknownBlock: si el fichero está bloqueado pero se desconoce el porqué
    @raise FileNoSources: si el fichero no tiene orígenes
    '''
    try:
        data = filesdb.get_file(file_id, bl = None)
    except BaseException as e:
        logging.exception(e)
        raise DatabaseError

    # intenta sacar el id del servidor de sphinx,
    # resuelve inconsistencias de los datos
    if not data:
        sid = searchd.get_id_server_from_search(file_id, file_name)
        if sid:
            try:
                data = filesdb.get_file(file_id, sid = sid, bl = None)
                if feedbackdb.initialized:
                    feedbackdb.notify_indir(file_id, sid)
            except BaseException as e:
                logging.exception(e)
                raise DatabaseError

    if data:
        bl = data.get("bl",None)
        if bl and isinstance(bl, (str, unicode)) and bl.isdigit():
            bl = int(bl)
        if bl:
            if bl == 1: raise FileFoofindRemoved
            elif bl == 3: raise FileRemoved
            logging.warn(
                "File with an unknown 'bl' value found: %s" % repr(bl),
                    extra=data)
            raise FileUnknownBlock

        file_se = data["se"] if "se" in data else None

        file_ntt = entitiesdb.get_entity(file_se["_id"]) if file_se and "_id" in file_se else None
        ntts = {file_se["_id"]:file_ntt} if file_ntt else {}

        '''
        # trae entidades relacionadas
        if file_ntt and "r" in file_ntt:
            rel_ids = list(set(eid for eids in file_ntt["r"].itervalues() for eid in eids))
            ntts.update({int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(rel_ids, None, (False, [u"episode"]))})
        '''
    else:
        raise FileNotExist

    #obtener los datos
    return fill_data(data, file_name, ntts)