def init_app(self, app, filesdb, entitiesdb, profiler): try: self.sphinx.init_app(app) self.proxy = SearchProxy(app.config, filesdb, entitiesdb, profiler, self.sphinx) self.sphinx.start_client(self.proxy.servers.keys()) except BaseException as e: logging.exception("Error on search deamon initialization.")
def pull_alternatives(self): ''' Descarga la configuración de alternativas del servidor y la aplica. ''' last = 0 try: for alternative in self.config_conn.config.alternatives.find({ "_id": { "$in": self._views.keys() }, "lt" : { "$gt": self._alternatives_lt } }): endpoint = alternative["_id"] if endpoint in self._alternatives_skip: self._alternatives_skip.remove(endpoint) elif endpoint in self._views and "config" in alternative: if "probability" in alternative["config"]: # Probability se guarda como lista por limitaciones de mongodb alternative["config"]["probability"] = dict(alternative["config"]["probability"]) self._views[endpoint].select.config(alternative["config"]) if alternative.get("lt", 0) > last: last = alternative["lt"] self._alternatives_lt = max(self._alternatives_lt, last) self.config_conn.end_request() except pymongo.errors.AutoReconnect as e: logging.exception("Can't access to config database.")
def number_size_format(size, lang="en"): ''' Formatea un tamaño de fichero en el idioma actual ''' if not size: return None, None elif int(float(size))==0: return "0", ("B","bytes") if lang in format_cache: decimal_sep, group_sep = format_cache[lang] else: decimal_sep, group_sep = format_cache[lang] = (get_decimal_symbol(lang), get_group_symbol(lang)) try: if size<1000: # no aplica para los bytes return str(size), ("B","bytes") else: size = log(float(size),1000) number = 1000**(size-int(size)) # parte decimal dec_part = int((number-int(number))*100) dec_part = "" if dec_part==0 else decimal_sep+"0"+str(dec_part) if dec_part<10 else decimal_sep+str(dec_part) # genera salida return ''.join( reversed([c + group_sep if i != 0 and i % 3 == 0 else c for i, c in enumerate(reversed(str(int(number))))]) ) + dec_part, (("KB","kilobytes"),("MB","megabytes"),("GB","gigabytes"),("TB","terabytes"))[int(size)-1] except BaseException as e: logging.exception(e) return None, None
def copyright(): ''' Muestra el formulario para reportar enlaces ''' g.cache_code = "S" g.category = False g.page_description = _("torrentsfm_desc") g.keywords.clear() g.keywords.update(["torrents search engine popular largest copyright"]) g.title.append(_("Copyright form")) form = ComplaintForm(request.form) if request.method=='POST': if "file_id" in request.form: try: file_id = request.form["file_id"] file_name = request.form.get("file_name",None) data = torrents_data(get_file_metadata(url2mid(file_id), file_name)) if data: form.urlreported.data=url_for("files.download",file_id=file_id,file_name=file_name,_external=True, _secure=False) form.linkreported.data=data['view']["sources"]["tmagnet"]["urls"][0] if "tmagnet" in data['view']["sources"] else data['view']["sources"]["download"]["urls"][0] if "download" in data['view']["sources"] else data['view']["sources"]["download_ind"]["urls"][0] except BaseException as e: logging.exception(e) elif form.validate(): pagesdb.create_complaint(dict([("ip",request.remote_addr)]+[(field.name,field.data) for field in form])) return empty_redirect(url_for('.home', _anchor="sent")) return render_template('copyright.html',form=form)
def run(self): gconn = None not_found_count = 0 with open("nf_ntts.csv", "w") as not_found_ntts: while True: # obtiene peticiones de buscar entidades afile = self.requests.get(True) if afile is None: self.requests.task_done() break if not gconn: gconn = pymongo.Connection(self.server, slave_okay=True) try: # busca la entidad principal main_ntt_id = int(afile["se"]["_id"]) ntt = gconn.ontology.ontology.find_one({"_id":main_ntt_id}) ntts1_info = set() ntts2_info = set() if ntt: afile["se"]["info"] = ntt # busca entidades de primer y segundo nivel if "r" in ntt and ntt["r"]: # genera la lista de entidades y tipos de relacion de primer nivel ntts1_info = {(ntt_id, relation[:3]) for relation, relation_ids in ntt["r"].iteritems() for ntt_id in relation_ids if ntt_id!=main_ntt_id} # si hay entidades de primer nivel... if ntts1_info: # obtiene entidades de primer nivel ntts1_ids = [ntt_id for ntt_id, relation in ntts1_info] ntts1 = list(gconn.ontology.ontology.find({"_id":{"$in":ntts1_ids}})) # genera la lista de entidades y tipos de relacion de segundo nivel ntts1_ids.append(main_ntt_id) # añade el id de la relacion para usar la lista como filtro ntts2_info = {(ntt_id, relation[:3]) for ntt2 in ntts1 if "r" in ntt2 for relation, relation_ids in ntt2["r"].iteritems() for ntt_id in relation_ids if ntt_id not in ntts1_ids} afile["se"]["rel"] = (ntts1_info, ntts2_info) else: not_found_ntts.write(str(afile["_id"])+"\n") not_found_count += 1 del afile["se"]["_id"] except BaseException: ntt_id = str(afile["se"]["_id"]) if "_id" in afile["se"] else "???" del afile["se"]["_id"] gconn.close() gconn = None logging.exception("Error obtaining entities for file %s: %s."%(str(afile["_id"]), ntt_id)) self.results.put(afile) self.requests.task_done() if not_found_count: logging.warn("Entities not found for some files. Check file nf_ntts.csv.")
def embed_info(f): ''' Añade la informacion del embed ''' embed_width = 560 embed_height = 315 embed_code = None for src_id, src_data in f["file"]["src"].iteritems(): source_id = src_data["t"] source_data = g.sources.get(source_id, None) if not (source_data and source_data.get("embed_active", False) and "embed" in source_data): continue try: embed_code = source_data["embed"] # comprueba si el content type se puede embeber embed_cts = source_data["embed_cts"] if "embed_cts" in source_data else DEFAULT_EMBED_CTS if not f["view"]["ct"] in embed_cts: continue embed_groups = () # url directamente desde los sources if "source_id" in f["view"] and f["view"]["source_id"]: embed_groups = {"id": f["view"]["source_id"]} elif "url_embed_regexp" in source_data and source_data["url_embed_regexp"]: # comprueba si la url puede ser utilizada para embeber embed_url = src_data["url"] regexp = source_data["url_embed_regexp"] embed_match = cache.regexp(regexp).match(embed_url) if embed_match is None: continue embed_groups = embed_match.groupdict() if "%s" in embed_code and "id" in embed_groups: # Modo simple, %s intercambiado por el id embed_code = embed_code % ( # Workaround para embeds con varios %s # no se hace replace para permitir escapes ('\%s') (embed_groups["id"],) * embed_code.count("%s") ) else: # Modo completo, %(variable)s intercambiado por grupos con nombre replace_dict = dict(f["file"]["md"]) replace_dict["width"] = embed_width replace_dict["height"] = embed_height replace_dict.update(embed_groups) try: embed_code = embed_code % replace_dict except KeyError as e: # No logeamos los errores por falta de metadatos 'special' if all(i.startswith("special:") for i in e.args): continue raise e except BaseException as e: logging.exception(e) continue f["view"]["embed"] = embed_code f["view"]["play"] = (source_data.get("embed_disabled", ""), source_data.get("embed_enabled", "")) break
def generate_file(args): file_id, afile = args try: if not init_file(afile): return None, None doc = [tag(n, afile[f] if f and f in afile and afile[f] else False, children_type=t, separator=",") for n,f,t in items] return tag("sphinx:document", doc, id=file_id, children_type=list), afile except BaseException as e: logging.exception("Error processing file %s.\n"%str(afile["_id"])) return None, e
def number_format_filter(number): ''' Formatea un numero en el idioma actual ''' try: return format_number(number, g.lang) except BaseException as e: logging.exception(e) return ""
def save_search(self, search, rowid, cat_id): if self.searches_conn: try: self.searches_conn.torrents.searches.insert({"_id":bson.objectid.ObjectId(rowid[:12]), "t":time(), "s":search, "c":cat_id}) self.searches_conn.end_request() except pymongo.errors.DuplicateKeyError as e: pass # don't log when a search is duplicated except BaseException as e: logging.exception("Can't register search stats.")
def secure_fill_data(file_data,text=None, ntts={}): ''' Maneja errores en fill_data ''' try: return fill_data(file_data,text,ntts) except BaseException as e: logging.exception("Fill_data error on file %s: %s"%(str(file_data["_id"]),repr(e))) return None
def init_g(): g.accept_cookies = None # argumentos de busqueda por defecto g.args = {} g.active_types = {} g.active_srcs = {} # caracteristicas del cliente g.full_browser=is_full_browser() g.search_bot=is_search_bot() # peticiones en modo preproduccion g.beta_request = request.url_root[request.url_root.index("//")+2:].startswith("beta.") # prefijo para los contenidos estáticos if g.beta_request: app_static_prefix = current_app.static_url_path else: app_static_prefix = current_app.config["STATIC_PREFIX"] or current_app.static_url_path g.static_prefix = app_static_prefix current_app.assets.url = app_static_prefix + "/" g.autocomplete_disabled = "false" if current_app.config["SERVICE_TAMING_ACTIVE"] else "true" # dominio de la web g.domain = "foofind.is" if request.url_root.rstrip("/").endswith(".is") else "foofind.com" # informacion de la página por defecto g.title = g.domain g.keywords = set() g.page_description = g.title g.full_lang = current_app.config["ALL_LANGS_COMPLETE"][g.lang] # downloader links g.downloader = current_app.config["DOWNLOADER"] g.downloader_properties = local_cache["downloader_properties"] g.user_build = current_app.config["DOWNLOADER_DEFAULT_BUILD"] # Find the best active build for the user for build, info in g.downloader_properties.iteritems(): try: if build != "common" and info["active"] and info["length"] and info.get("check_user_agent", lambda x:False)(request.user_agent): g.user_build = build except BaseException as e: logging.exception(e) accept_cookies = request.cookies.get("ck", "0") if accept_cookies=="0": if not (any(lang_code in request.accept_languages.values() for lang_code in current_app.config["SPANISH_LANG_CODES"]) or request.remote_addr in spanish_ips): accept_cookies = "2" g.accept_cookies = accept_cookies
def decorated_function(*args, **kwargs): __ = ThrowFallback._enabled cache_key = decorated_function.make_cache_key(*args, **kwargs) try: rv = f(*args, **kwargs) self.cache.set(cache_key, rv, timeout=decorated_function.cache_timeout) return rv except ThrowFallback: # Se ha ordenado usar el fallback self.cacheme = False except BaseException as e: if errors and not isinstance(e, errors): # No se ha especificado capturar el error, propagamos raise self.cacheme = False logging.exception(e) return self.cache.get(cache_key)
def delete(category, text): try: if "_age_" in text: for i in xrange(18): torrentsdb.remove_blacklist_entry(category, parse_entry(text.replace("_age_",str(i)))) else: torrentsdb.remove_blacklist_entry(category, parse_entry(text)) # Avisa para que se refresque las blacklist en la web y fuerza a que se haga en este hilo antes de mostrar la lista actualizada configdb.run_action("refresh_blacklists") configdb.pull_actions() g.alert["bl_deleted"] = ("info",Markup("Entry deleted: %s. <a href='%s?category=%s&text=%s'>Undo</a>" % (text, url_for('blacklist.home'), category, urllib.quote(text)))) except BaseException as e: logging.exception(e) g.alert["bl_del_error"] = ("error", "Error deleting entry.") return redirect(url_for('blacklist.home'))
def load(self, filename): ''' Load ip ranges from a file. This must have one line per range in format IP/RANGE_SIZE ''' try: # read file with open(filename, 'r') as f: ip_ranges = dict(self.parse_ip_range(ip_range) for ip_range in f if ip_range) # calculates the value for N self.N = 32-min(ip_ranges.itervalues()) # create masks groups for range_ip, range_mask_size in ip_ranges.iteritems(): masks_key = range_ip>>self.N if masks_key not in self.ips: self.ips[masks_key] = [] range_mask = ((1L<<range_mask_size)-1)<<(32-range_mask_size) self.ips[masks_key].append((range_ip&range_mask, range_mask)) except BaseException as e: logging.exception(e)
def number_size_format_filter(size, lang=None): ''' Formatea un tamaño de fichero en el idioma actual ''' if not size: return "" elif int(float(size))==0: return "0 B" if not lang: lang = g.lang if lang in format_cache: decimal_sep, group_sep = format_cache[lang] else: decimal_sep, group_sep = format_cache[lang] = (get_decimal_symbol(lang), get_group_symbol(lang)) try: if float(size)<1000: # no aplica para los bytes return str(size)+" B" else: size = log(float(size),1024) number = 1024**(size-int(size)) fix=0 if number>=1000: #para que los tamaños entre 1000 y 1024 pasen a la unidad siguiente number/=1024 fix=1 # parte decimal dec_part = int((number-int(number))*100) dec_part = "" if dec_part==0 else decimal_sep+"0"+str(dec_part) if dec_part<10 else decimal_sep+str(dec_part) # genera salida return ''.join( reversed([c + group_sep if i != 0 and i % 3 == 0 else c for i, c in enumerate(reversed(str(int(number))))]) ) + dec_part + (" KiB"," MiB"," GiB"," TiB")[int(size)-1+fix] except BaseException as e: logging.exception(e) return ""
def number_size_format(size, lang="en"): ''' Formatea un tamaño de fichero en el idioma actual ''' if not size: return None, None elif int(float(size)) == 0: return "0", ("B", "bytes") if lang in format_cache: decimal_sep, group_sep = format_cache[lang] else: decimal_sep, group_sep = format_cache[lang] = ( get_decimal_symbol(lang), get_group_symbol(lang)) try: if size < 1000: # no aplica para los bytes return str(size), ("B", "bytes") else: size = log(float(size), 1000) number = 1000**(size - int(size)) # parte decimal dec_part = int((number - int(number)) * 100) dec_part = "" if dec_part == 0 else decimal_sep + "0" + str( dec_part) if dec_part < 10 else decimal_sep + str(dec_part) # genera salida return ''.join( reversed([ c + group_sep if i != 0 and i % 3 == 0 else c for i, c in enumerate(reversed(str(int(number)))) ])) + dec_part, (("KB", "kilobytes"), ("MB", "megabytes"), ("GB", "gigabytes"), ("TB", "terabytes"))[int(size) - 1] except BaseException as e: logging.exception(e) return None, None
def home(): form = BlacklistForm(request.args) if form.action.data=="add" and form.validate(): try: text = form.text.data category = form.category.data if "_age_" in text: for i in xrange(18): torrentsdb.add_blacklist_entry(category, parse_entry(text.replace("_age_",str(i)))) else: torrentsdb.add_blacklist_entry(category, parse_entry(text)) # Avisa para que se refresque las blacklist en la web y fuerza a que se haga en este hilo antes de mostrar la lista actualizada configdb.run_action("refresh_blacklists") configdb.pull_actions() form.text.data = "" g.alert["bl_added"] = ("info", Markup("Entry added: %s. <a href='%s'>Undo</a>" % (text, url_for('blacklist.delete', category=category, text=text)))) except BaseException as e: logging.exception(e) g.alert["bl_add_error"] = ("error", "Error adding entry.") return render_template('blacklist.html', blacklists=blacklists, form=form)
def complaint(file_id=None,file_name=None): ''' Muestra el formulario para reportar enlaces ''' form = ReportLinkForm(request.form) if request.method=='POST': if "file_id" in request.form: try: file_id = request.form["file_id"] file_name = request.form.get("file_name",None) data = filesdb.get_file(url2mid(file_id), bl = None) if data: form.urlreported.data=url_for("files.download",file_id=file_id,file_name=file_name,_external=True).replace("%21","!") form.linkreported.data=data["src"].itervalues().next()["url"] except BaseException as e: logging.exception(e) elif form.validate(): pagesdb.create_complaint(dict([("ip",request.remote_addr)]+[(field.name,field.data) for field in form])) flash("message_sent") return redirect(url_for('index.home')) g.title+=_("complaint") return render_template('pages/complaint.html',page_title=_("complaint"),pagination=["privacy","legal",4,4],form=form,pname="complaint")
def contact(): ''' Muestra el formulario para reportar enlaces ''' g.cache_code = "S" sent_error = g.category=False g.page_description = "Torrents.com is a free torrent search engine that offers users fast, simple, easy access to every torrent in one place." g.keywords.clear() g.keywords.update(["torrent search engine", "torrents", "free", "download", "popular", "torrents.com"]) form = ContactForm(request.form) if request.method=='POST': if form.validate(): to = current_app.config["CONTACT_EMAIL"] try: mail.send(Message("contact", sender=form.email.data, recipients=[to], html="<p>%s, %s</p><p>%s</p>"%(request.remote_addr, request.user_agent, form.message.data))) return empty_redirect(url_for('.home', _anchor="sent")) except BaseException as e: g.alert["mail_error"] = ("error", "The message has not been sent. Try again later or send mail to %s."%to) logging.exception(e) g.title.append("Contact form") return render_template('contact.html',form=form, sent_error=sent_error)
def handler(): try: if request.method == "POST": data = request.form.to_dict() elif request.method == "GET": data = request.args.to_dict() else: abort(404) rdata = zlib.decompress(base64.b64decode(str(data["records"]), "-_")) data["records"] = json.loads(rdata) data["remote_addr"] = request.remote_addr logging.warn("Downloader error received", extra=data) response = make_response("OK") response.status_code = 202 except BaseException as e: logging.exception(e) response = make_response("ERROR") response.status_code = 500 response.mimetype = "text/plain" return response
def get_video_id(value): """ Examples: - http://youtu.be/SA2iWivDJiE - http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu - http://www.youtube.com/embed/SA2iWivDJiE - http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US """ try: query = urlparse(value.replace("&","&")) if query.hostname == 'youtu.be': return query.path[1:] if query.hostname in ('www.youtube.com', 'youtube.com'): if query.path == '/watch': p = parse_qs(query.query) return p['v'][0] if query.path[:7] == '/embed/': return query.path.split('/')[2] if query.path[:3] == '/v/': return query.path.split('/')[2] except BaseException as e: logging.exception(e) return None
def pull_actions(self): ''' Descarga la configuración de acciones y las ejecuta. ''' query = { "lt": { "$gt": self._actions_lt }, "target": {"$in": (self._appid, "*")}, "actionid": {"$in": [ action[0] for action in self.list_actions() if action[2] ]}, } last = self._actions_lt # Tareas que sólo deben realizarse una vez while True: try: # Operación atómica: obtiene y cambia timestamp para que no se repita action = self.config_conn.config.actions.find_and_modify(query, update={"$set":{"lt":0}}) except pymongo.errors.AutoReconnect as e: action = None logging.exception("Can't access to config database.") if action is None: break if action["lt"] > last: last = action["lt"] actionid = action["actionid"] if actionid in self._action_handlers: fnc, unique, args, kwargs = self._action_handlers[actionid] fnc(*args, **kwargs) # Tareas a realizar en todas las instancias del query["actionid"] try: for action in self.config_conn.config.actions.find(query): if action["lt"] > last: last = action["lt"] actionid = action["actionid"] if actionid in self._action_handlers: fnc, unique, args, kwargs = self._action_handlers[actionid] fnc(*args, **kwargs) self._actions_lt = last except pymongo.errors.AutoReconnect as e: logging.exception("Can't access to config database.") except BaseException as e: logging.exception("Error running action.") finally: self.config_conn.end_request()
def init_g(app): # secure? g.secure_request = request.headers.get("X-SSL-Active", "No")=="Yes" request.environ['wsgi.url_scheme'] = "https" if g.secure_request else "http" # cache por defecto g.must_cache = 7200 # caracteristicas del cliente g.full_browser=is_full_browser() g.search_bot=is_search_bot() # peticiones en modo preproduccion g.beta_request = request.url_root[request.url_root.index("//")+2:].startswith("beta.") # prefijo para los contenidos estáticos if g.beta_request: app_static_prefix = app.static_url_path else: app_static_prefix = app.config["STATIC_PREFIX"] or app.static_url_path g.static_prefix = app.assets.url = app_static_prefix # permite sobreescribir practicamente todo el <head> si es necesario g.override_header = False # alerts system g.alert = {} g.keywords = {'torrents', 'download', 'files', 'search', 'audio', 'video', 'image', 'document', 'software'} g.show_blacklisted_content = app.config["SHOW_BLACKLISTED_CONTENT"] # informacion de categorias g.categories = categories_cache.categories g.categories_by_url = categories_cache.categories_by_url g.categories_results = None g.featured = [] # pagina actual g.page_type = None # busqueda actual g.track = False g.query = g.clean_query = None g.category = None # cookie control g.must_accept_cookies = app.config["MUST_ACCEPT_COOKIES"] # images server g.images_server = app.config["IMAGES_SERVER"] g.is_adult_content = False # dominio de la web g.domain = None g.domains_family = app.config["ALLOWED_DOMAINS"] for domain in g.domains_family: if domain in request.url_root: g.domain = domain break else: return g.section = "torrents" if g.domain=="torrents.fm" else "downloader" if g.domain=="torrents.ms" else "news" g.domain_capitalized = g.domain.capitalize() # language selector g.langs = langs = app.config["LANGS"] g.translate_domains = app.config["TRANSLATE_DOMAINS"] g.lang = "".join(lang for lang in langs[1:] if lang+"."+g.domain in request.url_root) or langs[0] g.langs_switch = app.config["LANGS_SWITCH"] # IMPORTANT: DON'T USE URL_FOR BEFORE THIS POINT IN THIS FUNCTION! # RUM if "RUM_CODES" in app.config: rum_codes = app.config["RUM_CODES"] g.RUM_code = rum_codes[g.domain] if g.domain in rum_codes else rum_codes["torrents.com"] else: g.RUM_code = None # título de la página por defecto g.title = [g.domain_capitalized] # cookies g.domain_cookies = [url_for('index.cookies', _domain=domain) for domain in g.domains_family if domain!=g.domain] # Patrón de URL de busqueda, para evitar demasiadas llamadas a url_for g.url_search_base = url_for("files.search", query="___") g.url_adult_search_base = url_for("files.category", category="p**n", query="___") # permite ofrecer el downloader en enlaces de descarga g.offer_downloader = True # downloader links g.downloader_properties = local_cache["downloader_properties"] g.user_build = current_app.config["DOWNLOADER_DEFAULT_BUILD"] # Find the best active build for the user for build, info in g.downloader_properties.iteritems(): try: if build != "common" and info["active"] and info["length"] and info.get("check_user_agent", lambda x:False)(request.user_agent): g.user_build = build except BaseException as e: logging.exception(e) # banners g.banners = app.config["BANNERS"]
def format_metadata(f,text_cache, search_text_shown=False): ''' Formatea los metadatos de los archivos ''' text = text_cache[2] if text_cache else None view_md = f['view']['md'] = {} view_searches = f["view"]["searches"]={} file_type = f['view']['file_type'] if 'file_type' in f['view'] else None if 'md' in f['file']: #si viene con el formato tipo:metadato se le quita el tipo file_md = {(meta.split(":")[-1] if ":" in meta else meta): value for meta, value in f['file']['md'].iteritems()} # Duración para vídeo e imágenes seconds = get_float(file_md, "seconds") minutes = get_float(file_md, "minutes") hours = get_float(file_md, "hours") # Si no he recibido duracion de otra forma, pruebo con length y duration if seconds==minutes==hours==None: seconds = get_float(file_md, "length") or get_float(file_md, "duration") duration = [hours or 0, minutes or 0, seconds or 0] # h, m, s if any(duration): carry = 0 for i in xrange(len(duration)-1,-1,-1): unit = long(duration[i]) + carry duration[i] = unit%60 carry = unit/60 view_md["length"] = "%d:%02d:%02d" % tuple(duration) if duration[0] > 0 else "%02d:%02d" % tuple(duration[1:]) # Tamaño para vídeos e imágenes width = get_int(file_md, "width") height = get_int(file_md, "height") if width and height: view_md["size"] = "%dx%dpx" % (width, height) # Metadatos que no cambian try: view_md.update( (meta, file_md[meta]) for meta in ( "folders","description","fileversion","os","files","pages","format", "seeds","leechs","composer","publisher","encoding","director","writer","starring","producer","released" ) if meta in file_md ) view_searches.update( (meta, seoize_text(file_md[meta],"_",False)) for meta in ( "folders","os","composer","publisher","director","writer","starring","producer" ) if meta in file_md ) except BaseException as e: logging.warn(e) # thumbnail if "thumbnail" in file_md: f["view"]["thumbnail"] = file_md["thumbnail"] #metadatos que tienen otros nombres try: view_md.update(("tags", file_md[meta]) for meta in ("keywords", "tags", "tag") if meta in file_md) if "tags" in view_md and isinstance(view_md["tags"], basestring): view_searches["tags"] = [] view_md.update(("comments", file_md[meta]) for meta in ("comments", "comment") if meta in file_md) view_md.update(("track", file_md[meta]) for meta in ("track", "track_number") if meta in file_md) view_md.update(("created_by", file_md[meta]) for meta in ("created_by", "encodedby","encoder") if meta in file_md) view_md.update(("language", file_md[meta]) for meta in ("language", "lang") if meta in file_md) view_md.update(("date", file_md[meta]) for meta in ("published", "creationdate") if meta in file_md) view_md.update(("trackers", "\n".join(file_md[meta].split(" "))) for meta in ("trackers", "tracker") if meta in file_md and isinstance(file_md[meta], basestring)) view_md.update(("hash", file_md[meta]) for meta in ("hash", "infohash") if meta in file_md) view_md.update(("visualizations", file_md[meta]) for meta in ("count", "viewCount") if meta in file_md) if "unpackedsize" in file_md: view_md["unpacked_size"]=file_md["unpackedsize"] if "privateflag" in file_md: view_md["private_file"]=file_md["privateflag"] except BaseException as e: logging.warn(e) #torrents -> filedir filesizes filepaths if "filepaths" in file_md: filepaths = {} for path, size in izip_longest(u(file_md["filepaths"]).split("///"), u(file_md.get("filesizes","")).split(" "), fillvalue=None): # no permite tamaños sin fichero if not path: break parts = path.strip("/").split("/") # crea subdirectorios relative_path = filepaths for part in parts[:-1]: if "/"+part not in relative_path: relative_path["/"+part] = {} relative_path = relative_path["/"+part] # si ya existe el directorio no hace nada if "/"+parts[-1] in relative_path: pass # si el ultimo nivel se repite es un directorio (fallo de contenido) elif parts[-1] in relative_path: relative_path["/"+parts[-1]] = {} del relative_path[parts[-1]] else: relative_path[parts[-1]] = size if "filedir" in file_md: filepaths = {"/"+u(file_md["filedir"]).strip("/"):filepaths} if filepaths: view_md["filepaths"] = filepaths view_searches["filepaths"] = {} # Metadatos multimedia try: #extraccion del codec de video y/o audio if "video_codec" in file_md: #si hay video_codec se concatena el audio_codec detras si es necesario view_md["codec"]=file_md["video_codec"]+" "+file_md["audio_codec"] if "audio_codec" in file_md else file_md["video_codec"] else: #sino se meten directamente view_md.update(("codec", file_md[meta]) for meta in ("audio_codec", "codec") if meta in file_md) if file_type in ("audio", "video", "image"): view_md.update((meta, file_md[meta]) for meta in ("genre", "track", "artist", "author", "colors") if meta in file_md) view_searches.update((meta, seoize_text(file_md[meta], "_", False)) for meta in ("artist", "author") if meta in file_md) except BaseException as e: logging.warn(e) # No muestra titulo si es igual al nombre del fichero if "name" in file_md: title = u(file_md["name"]) elif "title" in file_md: title = u(file_md["title"]) else: title = f['view']['nfn'] if title: show_title = True text_longer = title text_shorter = f["view"]["fn"] if len(text_shorter)>len(text_longer): text_longer, text_shorter = text_shorter, text_longer if text_longer.startswith(text_shorter): text_longer = text_longer[len(text_shorter):] if len(text_longer)==0 or (len(text_longer)>0 and text_longer.startswith(".") and text_longer[1:] in EXTENSIONS): show_title = False if show_title: view_md["title"] = title view_searches["title"] = seoize_text(title, "_", False) # Los que cambian o son especificos de un tipo try: if "date" in view_md: #intentar obtener una fecha válida try: view_md["date"]=format_datetime(datetime.fromtimestamp(strtotime(view_md["date"]))) except: del view_md["date"] if file_type == 'audio': #album, year, bitrate, seconds, track, genre, length if 'album' in file_md: album = u(file_md["album"]) year = get_int(file_md, "year") if album: view_md["album"] = album + (" (%d)"%year if year and 1900<year<2100 else "") view_searches["album"] = seoize_text(album, "_", False) if 'bitrate' in file_md: # bitrate o bitrate - soundtype o bitrate - soundtype - channels bitrate = get_int(file_md, "bitrate") if bitrate: soundtype=" - %s" % file_md["soundtype"] if "soundtype" in file_md else "" channels = get_float(file_md, "channels") channels=" (%g %s)" % (round(channels,1),_("channels")) if channels else "" view_md["quality"] = "%g kbps %s%s" % (bitrate,soundtype,channels) elif file_type == 'document': #title, author, pages, format, version if "format" in file_md: view_md["format"] = "%s%s" % (file_md["format"]," %s" % file_md["formatversion"] if "formatversion" in file_md else "") version = [] if "formatVersion" in file_md: version.append(u(file_md["formatVersion"])) elif "version" in file_md: version.append(u(file_md["version"])) if "revision" in file_md: version.append(u(file_md["revision"])) if version: view_md["version"] = " ".join(version) elif file_type == 'image': #title, artist, description, width, height, colors pass elif file_type == 'software': #title, version, fileversion, os if "title" in view_md and "version" in file_md: view_md["title"] += " %s" % file_md["version"] view_searches["title"] += " %s" % seoize_text(file_md["version"], "_", False) elif file_type == 'video': quality = [] framerate = get_int(file_md, "framerate") if framerate: quality.append("%d fps" % framerate) if 'codec' in view_md: #si ya venia codec se muestra ahora en quality solamente quality.append(u(view_md["codec"])) del view_md["codec"] if quality: view_md["quality"] = " - ".join(quality) if "series" in file_md: series = u(file_md["series"]) if series: safe_series = seoize_text(series, "_", False) view_md["series"] = series view_searches["series"]="%s_%s"%(safe_series,"(series)") season = get_int(file_md, "season") if season: view_md["season"] = season view_searches["season"]="%s_(s%d)"%(safe_series,season) episode = get_int(file_md, "episode") if episode: view_md["episode"] = episode view_searches["episode"]="%s_(s%de%d)"%(safe_series,season,episode) except BaseException as e: logging.exception("Error obteniendo metadatos especificos del tipo de contenido.") view_mdh=f['view']['mdh']={} for metadata,value in view_md.items(): if isinstance(value, basestring): value = clean_html(value) if not value: del view_md[metadata] continue view_md[metadata]=value # resaltar contenidos que coinciden con la busqueda, para textos no muy largos if len(value)<500: view_mdh[metadata]=highlight(text,value) if text and len(text)<100 else value elif isinstance(value, float): #no hay ningun metadato tipo float view_md[metadata]=str(int(value)) else: view_md[metadata]=value
def download_file(file_id,file_name=None): ''' Devuelve el archivo a descargar, votos, comentarios y archivos relacionados ''' error=(None,"") #guarda el id y el texto de un error file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto if is_valid_url_fileid(file_id): try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except (bson.objectid.InvalidId, TypeError) as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=(404,"link_not_exist") else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":(301,"")} except BaseException as e: logging.exception(e) error=(503,"") file_id=None else: abort(404) if file_id: try: file_data=get_file_metadata(file_id, file_name) except DatabaseError: error=(503,"") except FileNotExist: error=(404,"link_not_exist") except (FileRemoved, FileFoofindRemoved, FileNoSources): error=(410,"error_link_removed") except FileUnknownBlock: error=(404,"") if error[0] is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=(503,"") if file_id is None or error[0] is not None: html="" if error[0] is not None: #si hay algun error se devuelve renderizado message_msgid="error_%s_message" % error[0] message_msgstr=_(message_msgid) g.title="%s %s" % (error[0], message_msgstr if message_msgstr!=message_msgid else _("error_500_message")) html=render_template('error.html',error=error,full_screen=True) return {"html": html,"play":None,"file_data":file_data,"error":error} else: save_visited([file_data]) title = u(file_data['view']['fn']) g.title = u"%s \"%s\" - %s" % ( _(file_data['view']['action']).capitalize(), title[:100], g.title) g.page_description = u"%s %s"%(_(file_data['view']['action']).capitalize(), seoize_text(title," ",True)) #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo y si ha marcado el archivo como favorito vote=None favorite = False if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) favorite=any(file_id==favorite["id"] for favorite in usersdb.get_fav_files(current_user)) #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"].split("_")[0]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] # en la pagina de download se intentan obtener palabras para buscar si no las hay if g.args.get("q", None) is None: query = download_search(file_data, file_name, "foofind") if query: g.args["q"] = query.replace(":","") return { "html":render_template('files/download.html',file=file_data,vote={"k":0} if vote is None else vote,favorite=favorite,form=form,comments=comments), "play":file_data["view"]["play"] if "play" in file_data["view"] else "", "file_data":file_data, }
def init_file(afile): global current_id # gets file's id current_id = file_id = str(afile["_id"]) # fixes a contenidos try: content_fixes(afile) except BaseException as e: logging.exception("Error fixing content file %s."%file_id) # entidades semanticas if "se" in afile and afile["se"] and "_id" in afile["se"]: try: entity = int(afile["se"]["_id"]) if "rel" in afile["se"]: rels = afile["se"]["rel"] afile["_ntts"] = u"%04d%s%s"%(entity, PHRASE_SEPARATOR*10 + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[0]) if rels[0] else "", PHRASE_SEPARATOR*(10-len(rels[0])/10) + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[1]) if rels[1] else "") except ValueError: logging.exception("Error parsing entity id %s for file %s."%(str(afile["se"]["_id"]), file_id)) entity = 0 except: logging.exception("Error generating entity metadata for file %s."%file_id) entity = 0 else: entity = 0 md = afile["md"] md_schemaless_keys = {key.split(":")[-1] for key in md.iterkeys()} # tipos del fichero src = afile["src"] types = {int(s["t"]) for uri, s in src.iteritems() if "t" in s and s["t"] in sources} if not types: return False isP2P = any(u"p" in sources[t]["g"] for t in types) # valores dependientes de tipos torrent_ct = None if not isP2P: return False trackers = md["torrent:trackers"] if "torrent:trackers" in md else 1 if "torrent:tracker" in md else 0 if isinstance(trackers, basestring): trackers = trackers.count(" ") # mira si es fichero Torrent o Torrent Hash main_type = 7 if 7 in types and len(types)==1 else 3 rate = rate_torrent(afile) afile["_r"] = rate["rating"]*10 afile["_d"] = rate["seeds"] inner_group = 0 # secondary rating r2 = 1 if "thumbnail" in md_schemaless_keys or ("i" in afile and isinstance(afile["i"],list)): r2+=2 # ficheros con imagenes if "description" in md_schemaless_keys: r2+=1 # ficheros con descripcion afile["_r2"] = r2 # uri del fichero afile["_uri0"], afile["_uri1"], afile["_uri2"] = afile_struct.unpack(afile['_id'].binary) fs = afile["date"] if "date" in afile else afile["fs"] fs = long(mktime(fs.timetuple())) if fs<now: afile["_fs"] = fs fns = nlargest(5, ((sum(sfn["fn"][crc]["m"] if "fn" in sfn and crc in sfn["fn"] else 0 for sfn in src.itervalues()), fn) for crc,fn in afile["fn"].iteritems())) afile["_fns"] = separator_join(f[1]["n"]+("."+(f[1].get("x",None) or "")) for f in fns) res = [[seoize_text(f[1]["n"], separator=" ", is_url=False, max_length=100, min_length=20)] for f in fns] # informacion del contenido ct, file_tags, file_format = guess_doc_content_type(afile, sources) # tags del fichero file_type = CONTENTS[ct].lower() file_category = [] file_category_tag = file_category_type = None for category in config["TORRENTS_CATEGORIES"]: if category.tag in file_tags and (not file_category or category.tag=="p**n"): # always use adult when its present if category.content_main: file_category.append(category.cat_id) else: file_category.insert(0,category.cat_id) if category.content_main and category.content==file_type: file_category_type = category.cat_id file_category_tag = category.tag afile["_ct"] = file_category[0] if file_category else file_category_type # tamaño try: z = float(afile["z"]) if "z" in afile and afile["z"] else False except: z = False if ct == CONTENT_VIDEO: try: l = int(float(md["video:duration"])) if "video:duration" in md else \ int(float(md["video:length"])) if "video:length" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ 60*int(float(md["video:minutes"])) if "video:minutes" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False elif ct == CONTENT_AUDIO: try: l = int(float(md["audio:seconds"])) if "audio:seconds" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False else: bitrate = l = False if z<1: z = False if l<1: l = False if bitrate: if l and not z: z = l*(bitrate<<7) # bitrate en kbps pasado a Kbps elif z and not l: l = z/(bitrate<<7) afile["_l"] = int(l) if 0<int(l)<0xFFFF else False afile["_z"] = log(z,2) if z else False # metadatos mds = chain(chain(*res), chain(value for key,value in md.iteritems() if key in GOOD_MDS and isinstance(value, basestring) and len(value)<=GOOD_MDS[key])) afile["_md"] = separator_join(amd for amd in mds if amd) # origenes afile["_s"] = [unicode(t) for t in types] # filtros de texto filters = {FILTER_PREFIX_CONTENT_TYPE+CONTENTS[ct]} filters.add("%storrent"%FILTER_PREFIX_SOURCE_GROUP) filters.update("%s%02d"%(prefix,int(md[key])) for key, prefix in numeric_filters.iteritems() if key in md and (isinstance(md[key], int) or isinstance(md[key], float) or (isinstance(md[key], basestring) and md[key].isdecimal()))) file_words = [word.strip().replace("-"," ") for key, value in md.iteritems() if value and isinstance(value, basestring) and any(key.endswith(dtag_md) for dtag_md in DYNAMIC_TAGS_METADATA) for word in SUBCATEGORIES_FINDER.findall(value.lower())] + file_tags dtags = {(tag, DYNAMIC_TAGS[tag][word]) for tag in file_tags if tag in DYNAMIC_TAGS for word in file_words if word in DYNAMIC_TAGS[tag]} if file_category_tag and file_category_tag in DYNAMIC_TAGS and file_category_tag not in file_tags and not any(category in file_tags for category in DYNAMIC_TAGS.iterkeys()): type_dtags = [(file_category_tag, DYNAMIC_TAGS[file_category_tag][word]) for word in file_words if word in DYNAMIC_TAGS[file_category_tag]] dtags.update(type_dtags) if type_dtags: file_tags.append(file_category_tag) filters.update("%s%s"%(FILTER_PREFIX_TAGS, tag) for tag in file_tags) filters.update("%s%s"%(FILTER_PREFIX_DYNAMIC_TAGS, dtag.replace(" ","")) for tag, dtag in dtags) if file_format: filters.add(FILTER_PREFIX_FORMAT+file_format[0]) afile["_fil"] = " ".join(filters) afile["__dtags"] = dtags # grupos afile["_g"] = ((entity << 32) | (((afile["_ct"] or 0)&0xF) << 28) | ((main_type & 0xFFFF) << 12) | (inner_group & 0xFFF)) return True
def get_file_metadata(file_id, file_name=None): ''' Obtiene el fichero de base de datos y rellena sus metadatos. @type file_id: mongoid @param file_id: id de mongo del fichero @type file_name: basestring @param file_name: nombre del fichero @rtype dict @return Diccionario de datos del fichero con metadatos @raise DatabaseError: si falla la conexión con la base de datos @raise FileNotExist: si el fichero no existe o ha sido bloqueado @raise FileRemoved: si el fichero ha sido eliminado de su origen @raise FileFoofindRemoved: si el fichero ha sido bloqueado por foofind @raise FileUnknownBlock: si el fichero está bloqueado pero se desconoce el porqué @raise FileNoSources: si el fichero no tiene orígenes ''' try: data = filesdb.get_file(file_id, bl = None) except BaseException as e: logging.exception(e) raise DatabaseError # intenta sacar el id del servidor de sphinx, # resuelve inconsistencias de los datos if not data: sid = searchd.get_id_server_from_search(file_id, file_name) if sid: try: data = filesdb.get_file(file_id, sid = sid, bl = None) if feedbackdb.initialized: feedbackdb.notify_indir(file_id, sid) except BaseException as e: logging.exception(e) raise DatabaseError if data: bl = data.get("bl",None) if bl and isinstance(bl, (str, unicode)) and bl.isdigit(): bl = int(bl) if bl: if bl == 1: raise FileFoofindRemoved elif bl == 3: raise FileRemoved logging.warn( "File with an unknown 'bl' value found: %s" % repr(bl), extra=data) raise FileUnknownBlock file_se = data["se"] if "se" in data else None file_ntt = entitiesdb.get_entity(file_se["_id"]) if file_se and "_id" in file_se else None ntts = {file_se["_id"]:file_ntt} if file_ntt else {} ''' # trae entidades relacionadas if file_ntt and "r" in file_ntt: rel_ids = list(set(eid for eids in file_ntt["r"].itervalues() for eid in eids)) ntts.update({int(ntt["_id"]):ntt for ntt in entitiesdb.get_entities(rel_ids, None, (False, [u"episode"]))}) ''' else: raise FileNotExist #obtener los datos return fill_data(data, file_name, ntts)
def save_visited(self, files): try: self.searchd.get_redis_connection().publish(VISITED_LINKS_CHANNEL, msgpack.packb([mid2hex(f["file"]["_id"]) for f in files if f])) except BaseException as e: logging.exception("Can't log visited files.")
def download(file_id, file_name=""): g.page_type = FILE_PAGE_TYPE if request.referrer: try: posibles_queries = referrer_parser.match(request.referrer) if posibles_queries: query = posibles_queries.group(1) or posibles_queries.group(2) or "" if query: get_query_info(u(urllib2.unquote_plus(query).decode("utf-8"))) except: pass error = None file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except TypeError as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=404 else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301} except BaseException as e: logging.exception(e) error=503 file_id=None if file_id: try: file_data=get_file_metadata(file_id, file_name.replace("-"," ")) except DatabaseError: error=503 except FileNotExist: error=404 except (FileRemoved, FileFoofindRemoved, FileNoSources): error=410 except FileUnknownBlock: error=404 if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=503 if error: abort(error) # completa datos de torrent file_data = torrents_data(file_data, True, g.category) if not file_data: abort(404) if file_data["view"]["category"]: g.category = file_data["view"]["category"] if file_data["view"]["category"].tag=="p**n": g.is_adult_content = True else: g.category = file_data["view"]["category_type"] # no permite acceder ficheros que deberian ser bloqueados prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn']) if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]): g.blacklisted_content = "File" if not g.show_blacklisted_content: abort(404) query = download_search(file_data, file_name, "torrent").replace("-"," ") related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None) # elige el titulo de la página title = file_data['view']['fn'] # recorta el titulo hasta el proximo separador if len(title)>101: for pos in xrange(101, 30, -1): if title[pos] in SEPPER: title = title[:pos].strip() break else: title = title[:101] g.title = [title] page_description = "" if "description" in file_data["view"]["md"]: page_description = file_data["view"]["md"]["description"].replace("\n", " ") if not page_description: if g.category: page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize() else: page_description = _("download_desc") if len(page_description)<50: if page_description: page_description += ". " page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"]) if len(page_description)>180: last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0 if last_stop<100: last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0 if last_stop<100: last_stop = 180 page_description = page_description[:last_stop]+"." g.page_description = page_description is_canonical_filename = file_data["view"]["seo-fn"]==file_name # registra visita al fichero if g.search_bot: searchd.log_bot_event(g.search_bot, True) else: save_visited([file_data]) if related[0]: g.must_cache = 3600 # last-modified g.last_modified = file_data["file"]["ls"] return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))