def html_params(**kwargs): params = [] for k,v in sorted(kwargs.iteritems()): if k in ('class_', 'class__', 'for_'): k = k[:-1] if v is True: params.append(k) else: params.append(u'%s="%s"' % (u(k), escape(u(v), quote=True))) return u" ".join(params)
def autocomplete(): ''' Devuelve la lista para el autocompletado de la búsqueda ''' query = u(request.args.get("term", "")).lower() if not query: return "[]" ct = u(request.args.get("t","")).lower() tamingWeight = {"c":1, "lang":200} if ct: for cti in CONTENTS_CATEGORY[ct]: tamingWeight[TAMING_TYPES[cti]] = 200 options = taming.tameText(query, tamingWeight, 5, 3, 0.2) if options is None: cache.cacheme = False return "[]" return json.dumps([result[2] for result in options])
def tag(_name, _children=None, separator="", children_type=None, **kwargs): if _children is False: return u"" else: attr = (" " + space_join('%s="%s"' % (key, u(val)) for key, val in kwargs.iteritems() if val)) if kwargs else "" if _children: if children_type is list: return u"<%s%s>%s</%s>" % (_name, attr, separator.join(_children), _name) elif children_type is unicode: escaped_children = space_join(XML_ILLEGAL_CHARS_RE.split(u(_children))) if "&" in escaped_children: return u"<%s%s><![CDATA[%s]]></%s>" % (_name, attr, escaped_children, _name) else: return u"<%s%s>%s</%s>" % (_name, attr, escaped_children, _name) elif children_type is str: return u"<%s%s>%s</%s>" % (_name, attr, _children, _name) elif children_type is float: return u"<%s%s>%.8f</%s>" % (_name, attr, _children, _name) else: return u"<%s%s>%s</%s>" % (_name, attr, unicode(_children), _name) else: return u"<%s%s/>" % (_name, attr)
def before_request(): # si el idioma de la URL es inválido, devuelve página no encontrada if g.url_lang and not g.url_lang in app.config["ALL_LANGS"]: abort(404) # quita todas las / finales de una URL if request.path != '/' and request.path.endswith('/'): return redirect(u(request.url).replace(u(request.path), u(request.path)[:-1]),301) # ignora peticiones sin blueprint if request.blueprint is None: return # si no es el idioma alternativo, lo añade por si no se encuentra el mensaje if g.lang!="en": get_translations().add_fallback(fallback_lang) # dominio de la web g.domain = request.url_root[7:-1] if "https" not in request.url_root else request.url_root[8:-1] # título de la página por defecto g.title = g.domain # contador de archivos totales g.count_files = lastcount[0]
def before_request(): # No preprocesamos la peticiones a static if request.path.startswith("/static"): g.accept_cookies = None return # default values for g object init_g() # comprueba limite de ratio de peticiones check_rate_limit(g.search_bot) # si el idioma de la URL es inválido, devuelve página no encontrada all_langs = current_app.config["ALL_LANGS"] if g.url_lang and not g.url_lang in all_langs: abort(404) # añade idioma por defecto si no hay traducción de algún mensaje en el idioma actual if g.lang!="en": add_translation_fallback("en") # si hay que cambiar el idioma if request.args.get("setlang",None): session["lang"]=g.lang # si el idioma esta entre los permitidos y el usuario esta logueado se actualiza en la base de datos if g.lang in all_langs and current_user.is_authenticated(): current_user.set_lang(g.lang) usersdb.update_user({"_id":current_user.id,"lang":g.lang}) return redirect(request.base_url) g.keywords = set(_(keyword) for keyword in ['download', 'watch', 'files', 'submit_search', 'audio', 'video', 'image', 'document', 'software', 'P2P', 'direct_downloads']) descr = _("about_text") g.page_description = descr[:descr.find("<br")] # ignora peticiones sin blueprint if request.blueprint is None and request.path.endswith("/"): if "?" in request.url: root = request.url_root[:-1] path = request.path.rstrip("/") query = u(request.url) query = query[query.find(u"?"):] return redirect(root+path+query, 301) return redirect(request.url.rstrip("/"), 301)
def download_search(file_data, file_text, fallback): ''' Intenta buscar una cadena a buscar cuando viene download ''' search_texts=[] if file_data: mds = file_data['file']['md'] for key in ['audio:artist', 'audio:album', 'video:series', 'video:title', 'image:title', 'audio:title','application:name', 'application:title', 'book:title', 'torrent:title']: if key in mds and isinstance(mds[key], basestring) and len(mds[key])>1: search_texts.append((u(mds[key]), False)) search_texts.append((file_data["view"]["fn"], True)) if file_data['view']["tags"]: fallback = "(%s)"%file_data['view']["tags"][0] elif file_data['view']['file_type']: fallback = "(%s)"%file_data['view']['file_type'] if file_text: search_texts.append((file_text, True)) best_candidate = None best_points = 10000 for main_position, (search_text, is_filename) in enumerate(search_texts): phrases = split_phrase(search_text, is_filename) for inner_position, phrase in enumerate(phrases): candidate = [part for part in phrase.split(" ") if part.strip()] count = sum(1 for word in candidate if len(word)>1) is_numeric = len(candidate)==1 and candidate[0].isdecimal() candidate_points = main_position+inner_position+(50 if count==0 else 5 if count==1 and is_numeric else 20 if count>15 else 0) if candidate_points<best_points: best_points = candidate_points best_candidate = candidate if best_candidate and best_points<2: break if best_candidate: return " ".join(best_candidate[:5]) return fallback
def simple_pyon(x): ''' Convierte una cadena representando un objeto de python en un objeto de python con el tipo apropiado. Muy simplificado. ''' x = x.strip() if x.lower() in ("","None","null","none"): return None if x.replace("+","",x.startswith("+")).replace("-","",x.startswith("-")).replace(".","",x.count(".")==1).isdigit(): if "." in x: float(x) return int(x) if x.endswith("j"): return complex(x) if len(x)>1: if x[0] == x[-1] and x[0] in "\"'": return x[1:-1] if x[0] == "u" and x[1] == x[-1] and x[1] in "\"'": return u(x[2:-1]) if x[0] == "[" and x[-1] == "]": return list(simple_pyon(i) for i in x[1:-1].split(",")) if x[0] == "(" and x[-1] == ")": return tuple(simple_pyon(i) for i in x[1:-1].split(",")) if x[0] == "{" and x[-1] == "}": return dict((simple_pyon(i.split(":")[0]), simple_pyon(i.split(":")[1])) for i in x[1:-1].split(",")) return x
def build_source_links(f, prevsrc=False, unify_torrents=False): ''' Construye los enlaces correctamente ''' def get_domain(src): ''' Devuelve el dominio de una URL ''' url_parts=urlparse(src).netloc.split('.') i=len(url_parts)-1 if len(url_parts[i])<=2 and len(url_parts[i-1])<=3: return url_parts[i-2]+'.'+url_parts[i-1]+'.'+url_parts[i] else: return url_parts[i-1]+'.'+url_parts[i]; if not "fn" in f['view']: choose_filename(f) f['view']['action']='download' f['view']['sources']=defaultdict(dict) srcs=f['file']['src'] max_weight=0 icon = "" for hexuri,src in srcs.items(): if not src.get('bl',None) in (0, None): continue join=False count=0 part=url="" source_data=filesdb.get_source_by_id(src["t"]) if source_data is None: #si no exite el origen del archivo logging.error("El fichero contiene un origen inexistente en la tabla \"sources\": %s" % src["t"], extra={"file":f}) continue elif "crbl" in source_data and source_data["crbl"]==1: #si el origen esta bloqueado continue elif "w" in source_data["g"] or "f" in source_data["g"] or "s" in source_data["g"]: #si es descarga directa link_weight=1 tip=source_data["d"] icon="web" url=src['url'] if "f" in source_data["g"]: source=get_domain(src['url']) else: source=source_data["d"] #en caso de duda se prefiere streaming if "s" in source_data["g"]: f['view']['action']='watch' link_weight*=2 #torrenthash antes de torrent porque es un caso especifico elif source_data["d"]=="BitTorrentHash": link_weight=0.7 if 'torrent:tracker' in f['file']['md'] or 'torrent:trackers' in f['file']['md'] else 0.1 if unify_torrents: tip=source=icon="torrent" else: tip="Torrent MagnetLink" source=icon="tmagnet" join=True count=int(src['m']) part="xt=urn:btih:"+src['url'] if 'torrent:tracker' in f['file']['md']: part += unicode('&tr=' + urllib.quote_plus(u(f['file']['md']['torrent:tracker']).encode("UTF-8")), "UTF-8") elif 'torrent:trackers' in f['file']['md']: trackers = f['file']['md']['torrent:trackers'] if isinstance(trackers, basestring): part += unicode("".join('&tr='+urllib.quote_plus(tr) for tr in u(trackers).encode("UTF-8").split(" ")), "UTF-8") elif "t" in source_data["g"]: link_weight=0.8 url=src['url'] icon="torrent" if unify_torrents: tip=source="torrent" else: tip=source=get_domain(src['url']) elif source_data["d"]=="Gnutella": link_weight=0.2 tip="Gnutella" source=icon="gnutella" part="xt:urn:sha1:"+src['url'] join=True count=int(src['m']) elif source_data["d"]=="ed2k": link_weight=0.1 tip="ED2K" source=icon="ed2k" url="ed2k://|file|"+f['view']['efn']+"|"+str(f['file']['z'] if "z" in f["file"] else 1)+"|"+src['url']+"|/" count=int(src['m']) elif source_data["d"]=="Tiger": link_weight=0 tip="Gnutella" source=icon="gnutella" part="xt:urn:tiger="+src['url'] elif source_data["d"]=="MD5": link_weight=0 tip="Gnutella" source=icon="gnutella" part="xt:urn:md5="+src['url'] else: continue f['view']['sources'][source]['tip']=tip f['view']['sources'][source]['icon']=icon f['view']['sources'][source]['logo']="http://%s/favicon.ico"%tip f['view']['sources'][source]['join']=join f['view']['sources'][source]['type']=source_data["g"] #para no machacar el numero si hay varios archivos del mismo source if not 'count' in f['view']['sources'][source] or count>0: f['view']['sources'][source]['count']=count if not "parts" in f['view']['sources'][source]: f['view']['sources'][source]['parts']=[] if not 'urls' in f['view']['sources'][source]: f['view']['sources'][source]['urls']=[] if part!="": f['view']['sources'][source]['parts'].append(part) if url!="": f['view']['sources'][source]['urls'].append(url) if source_data["d"]!="ed2k": f['view']['sources'][source]['count']+=1 if link_weight>max_weight: max_weight = link_weight f['view']['source'] = source if icon!="web": for src,info in f['view']['sources'].items(): if 'join' in info: size="" if 'z' in f['file']: size = "&xl="+str(f['file']['z']) f['view']['sources'][src]['urls'].append("magnet:?dn="+f['view']['efn']+size+"&"+"&".join(info['parts'])) elif not 'urls' in info: del(f['view']['sources'][src])
def fix_sphinx_result(word): return u(SPHINX_WRONG_RANGE.sub(fixer, word))
def format_metadata(f,text_cache, search_text_shown=False): ''' Formatea los metadatos de los archivos ''' text = text_cache[2] if text_cache else None view_md = f['view']['md'] = {} view_searches = f["view"]["searches"]={} file_type = f['view']['file_type'] if 'file_type' in f['view'] else None if 'md' in f['file']: #si viene con el formato tipo:metadato se le quita el tipo file_md = {(meta.split(":")[-1] if ":" in meta else meta): value for meta, value in f['file']['md'].iteritems()} # Duración para vídeo e imágenes seconds = get_float(file_md, "seconds") minutes = get_float(file_md, "minutes") hours = get_float(file_md, "hours") # Si no he recibido duracion de otra forma, pruebo con length y duration if seconds==minutes==hours==None: seconds = get_float(file_md, "length") or get_float(file_md, "duration") duration = [hours or 0, minutes or 0, seconds or 0] # h, m, s if any(duration): carry = 0 for i in xrange(len(duration)-1,-1,-1): unit = long(duration[i]) + carry duration[i] = unit%60 carry = unit/60 view_md["length"] = "%d:%02d:%02d" % tuple(duration) if duration[0] > 0 else "%02d:%02d" % tuple(duration[1:]) # Tamaño para vídeos e imágenes width = get_int(file_md, "width") height = get_int(file_md, "height") if width and height: view_md["size"] = "%dx%dpx" % (width, height) # Metadatos que no cambian try: view_md.update( (meta, file_md[meta]) for meta in ( "folders","description","fileversion","os","files","pages","format", "seeds","leechs","composer","publisher","encoding","director","writer","starring","producer","released" ) if meta in file_md ) view_searches.update( (meta, seoize_text(file_md[meta],"_",False)) for meta in ( "folders","os","composer","publisher","director","writer","starring","producer" ) if meta in file_md ) except BaseException as e: logging.warn(e) # thumbnail if "thumbnail" in file_md: f["view"]["thumbnail"] = file_md["thumbnail"] #metadatos que tienen otros nombres try: view_md.update(("tags", file_md[meta]) for meta in ("keywords", "tags", "tag") if meta in file_md) if "tags" in view_md and isinstance(view_md["tags"], basestring): view_searches["tags"] = [] view_md.update(("comments", file_md[meta]) for meta in ("comments", "comment") if meta in file_md) view_md.update(("track", file_md[meta]) for meta in ("track", "track_number") if meta in file_md) view_md.update(("created_by", file_md[meta]) for meta in ("created_by", "encodedby","encoder") if meta in file_md) view_md.update(("language", file_md[meta]) for meta in ("language", "lang") if meta in file_md) view_md.update(("date", file_md[meta]) for meta in ("published", "creationdate") if meta in file_md) view_md.update(("trackers", "\n".join(file_md[meta].split(" "))) for meta in ("trackers", "tracker") if meta in file_md and isinstance(file_md[meta], basestring)) view_md.update(("hash", file_md[meta]) for meta in ("hash", "infohash") if meta in file_md) view_md.update(("visualizations", file_md[meta]) for meta in ("count", "viewCount") if meta in file_md) if "unpackedsize" in file_md: view_md["unpacked_size"]=file_md["unpackedsize"] if "privateflag" in file_md: view_md["private_file"]=file_md["privateflag"] except BaseException as e: logging.warn(e) #torrents -> filedir filesizes filepaths if "filepaths" in file_md: filepaths = {} for path, size in izip_longest(u(file_md["filepaths"]).split("///"), u(file_md.get("filesizes","")).split(" "), fillvalue=None): # no permite tamaños sin fichero if not path: break parts = path.strip("/").split("/") # crea subdirectorios relative_path = filepaths for part in parts[:-1]: if "/"+part not in relative_path: relative_path["/"+part] = {} relative_path = relative_path["/"+part] # si ya existe el directorio no hace nada if "/"+parts[-1] in relative_path: pass # si el ultimo nivel se repite es un directorio (fallo de contenido) elif parts[-1] in relative_path: relative_path["/"+parts[-1]] = {} del relative_path[parts[-1]] else: relative_path[parts[-1]] = size if "filedir" in file_md: filepaths = {"/"+u(file_md["filedir"]).strip("/"):filepaths} if filepaths: view_md["filepaths"] = filepaths view_searches["filepaths"] = {} # Metadatos multimedia try: #extraccion del codec de video y/o audio if "video_codec" in file_md: #si hay video_codec se concatena el audio_codec detras si es necesario view_md["codec"]=file_md["video_codec"]+" "+file_md["audio_codec"] if "audio_codec" in file_md else file_md["video_codec"] else: #sino se meten directamente view_md.update(("codec", file_md[meta]) for meta in ("audio_codec", "codec") if meta in file_md) if file_type in ("audio", "video", "image"): view_md.update((meta, file_md[meta]) for meta in ("genre", "track", "artist", "author", "colors") if meta in file_md) view_searches.update((meta, seoize_text(file_md[meta], "_", False)) for meta in ("artist", "author") if meta in file_md) except BaseException as e: logging.warn(e) # No muestra titulo si es igual al nombre del fichero if "name" in file_md: title = u(file_md["name"]) elif "title" in file_md: title = u(file_md["title"]) else: title = f['view']['nfn'] if title: show_title = True text_longer = title text_shorter = f["view"]["fn"] if len(text_shorter)>len(text_longer): text_longer, text_shorter = text_shorter, text_longer if text_longer.startswith(text_shorter): text_longer = text_longer[len(text_shorter):] if len(text_longer)==0 or (len(text_longer)>0 and text_longer.startswith(".") and text_longer[1:] in EXTENSIONS): show_title = False if show_title: view_md["title"] = title view_searches["title"] = seoize_text(title, "_", False) # Los que cambian o son especificos de un tipo try: if "date" in view_md: #intentar obtener una fecha válida try: view_md["date"]=format_datetime(datetime.fromtimestamp(strtotime(view_md["date"]))) except: del view_md["date"] if file_type == 'audio': #album, year, bitrate, seconds, track, genre, length if 'album' in file_md: album = u(file_md["album"]) year = get_int(file_md, "year") if album: view_md["album"] = album + (" (%d)"%year if year and 1900<year<2100 else "") view_searches["album"] = seoize_text(album, "_", False) if 'bitrate' in file_md: # bitrate o bitrate - soundtype o bitrate - soundtype - channels bitrate = get_int(file_md, "bitrate") if bitrate: soundtype=" - %s" % file_md["soundtype"] if "soundtype" in file_md else "" channels = get_float(file_md, "channels") channels=" (%g %s)" % (round(channels,1),_("channels")) if channels else "" view_md["quality"] = "%g kbps %s%s" % (bitrate,soundtype,channels) elif file_type == 'document': #title, author, pages, format, version if "format" in file_md: view_md["format"] = "%s%s" % (file_md["format"]," %s" % file_md["formatversion"] if "formatversion" in file_md else "") version = [] if "formatVersion" in file_md: version.append(u(file_md["formatVersion"])) elif "version" in file_md: version.append(u(file_md["version"])) if "revision" in file_md: version.append(u(file_md["revision"])) if version: view_md["version"] = " ".join(version) elif file_type == 'image': #title, artist, description, width, height, colors pass elif file_type == 'software': #title, version, fileversion, os if "title" in view_md and "version" in file_md: view_md["title"] += " %s" % file_md["version"] view_searches["title"] += " %s" % seoize_text(file_md["version"], "_", False) elif file_type == 'video': quality = [] framerate = get_int(file_md, "framerate") if framerate: quality.append("%d fps" % framerate) if 'codec' in view_md: #si ya venia codec se muestra ahora en quality solamente quality.append(u(view_md["codec"])) del view_md["codec"] if quality: view_md["quality"] = " - ".join(quality) if "series" in file_md: series = u(file_md["series"]) if series: safe_series = seoize_text(series, "_", False) view_md["series"] = series view_searches["series"]="%s_%s"%(safe_series,"(series)") season = get_int(file_md, "season") if season: view_md["season"] = season view_searches["season"]="%s_(s%d)"%(safe_series,season) episode = get_int(file_md, "episode") if episode: view_md["episode"] = episode view_searches["episode"]="%s_(s%de%d)"%(safe_series,season,episode) except BaseException as e: logging.exception("Error obteniendo metadatos especificos del tipo de contenido.") view_mdh=f['view']['mdh']={} for metadata,value in view_md.items(): if isinstance(value, basestring): value = clean_html(value) if not value: del view_md[metadata] continue view_md[metadata]=value # resaltar contenidos que coinciden con la busqueda, para textos no muy largos if len(value)<500: view_mdh[metadata]=highlight(text,value) if text and len(text)<100 else value elif isinstance(value, float): #no hay ningun metadato tipo float view_md[metadata]=str(int(value)) else: view_md[metadata]=value
def choose_filename(f,text=False): ''' Elige el archivo correcto ''' text=slugify(text) if text else text srcs = f['file']['src'] fns = f['file']['fn'] chosen = None max_count = -1 has_text = 0 try: for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue this_has_text=0 for crc,srcfn in src['fn'].items(): #si no tiene nombre no se tiene en cuenta m = srcfn['m'] if len(fns[crc]['n'])>0 else 0 if 'c' in fns[crc]: fns[crc]['c']+=m else: fns[crc]['c']=m if text: slugified_fn = slugify(fns[crc]['n']).strip() if len(slugified_fn)>0: #TODO hace lo mismo que para poner el nombre en negrita y sacar el mejor texto aunque no tenga tildes o no venga unido por espacios if slugified_fn.upper().find(text.upper())!=-1: this_has_text = 2000 else: matches = 0 for word in [re.escape(w) for w in text.split(" ")]: matches += len(re.findall(r"/((?:\b|_)%s(?:\b|_))/i"%word, slugified_fn)) if matches>0: this_has_text = 1000 + matches f['file']['fn'][crc]['tht'] = this_has_text better = fns[crc]['c']>max_count if this_has_text > has_text or (better and this_has_text==has_text): has_text = this_has_text chosen = crc max_count = fns[crc]['c'] except KeyError as e: #para los sources que tienen nombre pero no estan en el archivo logging.exception(e) f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) if chosen: filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src srcurl = "" for hexuri,src in srcs.items(): if src['url'].find("/")!=-1: srcurl = src['url'] if srcurl=="": return srcurl = srcurl[srcurl.rfind("/")+1:] ext = srcurl[srcurl.rfind(".")+1:] filename = srcurl[0:srcurl.rfind(".")] filename = Markup(filename).striptags()[:512] if not ext in EXTENSIONS: filename += ext ext="" nfilename = filename else: #clean filename end = filename.upper().rfind("."+ext.upper()) if end == -1: nfilename = filename else: nfilename = filename.strip()[0:end] f['view']['fn'] = filename #TODO para los tipo archive buscar el tipo real en el nombre (mp3,avi...) f['view']['fnx'] = ext f['view']['efn'] = filename.replace(" ", "%20") #poner bonito nombre del archivo if nfilename.find(" ")==-1: nfilename = nfilename.replace(".", " ") f['view']['nfn'] = nfilename.replace("_", " ") #nombre del archivo escapado para generar las url de descarga f['view']['qfn'] = u(filename).encode("UTF-8") #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if not text:# or not has_text: f['view']['fnh'] = f['view']['fnhs'] = filename else: f['view']['fnh'], f['view']['fnhs'] = highlight(text,filename,True) return has_text>0
def d(self): if self._d is None: self._d = PyQuery(u(self.data)) return self._d
def download_file(file_id,file_name=None): ''' Devuelve el archivo a descargar, votos, comentarios y archivos relacionados ''' error=(None,"") #guarda el id y el texto de un error file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto if is_valid_url_fileid(file_id): try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except (bson.objectid.InvalidId, TypeError) as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=(404,"link_not_exist") else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":(301,"")} except BaseException as e: logging.exception(e) error=(503,"") file_id=None else: abort(404) if file_id: try: file_data=get_file_metadata(file_id, file_name) except DatabaseError: error=(503,"") except FileNotExist: error=(404,"link_not_exist") except (FileRemoved, FileFoofindRemoved, FileNoSources): error=(410,"error_link_removed") except FileUnknownBlock: error=(404,"") if error[0] is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=(503,"") if file_id is None or error[0] is not None: html="" if error[0] is not None: #si hay algun error se devuelve renderizado message_msgid="error_%s_message" % error[0] message_msgstr=_(message_msgid) g.title="%s %s" % (error[0], message_msgstr if message_msgstr!=message_msgid else _("error_500_message")) html=render_template('error.html',error=error,full_screen=True) return {"html": html,"play":None,"file_data":file_data,"error":error} else: save_visited([file_data]) title = u(file_data['view']['fn']) g.title = u"%s \"%s\" - %s" % ( _(file_data['view']['action']).capitalize(), title[:100], g.title) g.page_description = u"%s %s"%(_(file_data['view']['action']).capitalize(), seoize_text(title," ",True)) #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo y si ha marcado el archivo como favorito vote=None favorite = False if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) favorite=any(file_id==favorite["id"] for favorite in usersdb.get_fav_files(current_user)) #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"].split("_")[0]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] # en la pagina de download se intentan obtener palabras para buscar si no las hay if g.args.get("q", None) is None: query = download_search(file_data, file_name, "foofind") if query: g.args["q"] = query.replace(":","") return { "html":render_template('files/download.html',file=file_data,vote={"k":0} if vote is None else vote,favorite=favorite,form=form,comments=comments), "play":file_data["view"]["play"] if "play" in file_data["view"] else "", "file_data":file_data, }
files.append(afile) if len(ids)>=result["total_found"]: total_found = len(files) else: total_found = max(result["total_found"], len(files)) # completa la descripcion de la pagina if files: # Descripcion inicial page_description = g.page_description + ". " # Descripcion de alguno de los primeros ficheros for f in files[:3]: if "description" in f["view"]["md"]: phrase = u(f["view"]["md"]["description"]).capitalize() page_description += phrase + " " if phrase[-1]=="." else ". " break # Busca frases para completar la descripcion hasta un minimo de 100 caracteres page_description_len = len(page_description) if page_description_len<100: phrases = [] for f in files[1:]: # se salta el primer fichero, que podría ser el download actual phrase = f["view"]["nfn"].capitalize() if phrase not in phrases: phrases.append(phrase) page_description_len += len(phrase) if page_description_len>=100:
def format_metadata(f,details,text): ''' Formatea los metadatos de los archivos ''' def searchable(value,details): ''' Añade un enlace a la busqueda si es necesario ''' if details: return '<a href="%s">%s</a>' % (url_for("files.search",q=value), value) else: return value view_md = f['view']['md'] = {} file_type = f['view']['file_type'] if 'file_type' in f['view'] else None if 'md' in f['file']: #si viene con el formato tipo:metadato se le quita el tipo file_md = {(meta.split(":")[-1] if ":" in meta else meta): value for meta, value in f['file']['md'].iteritems()} # Duración para vídeo e imágenes put_duration = False duration = [0, 0, 0] # h, m, s try: if "seconds" in file_md: put_duration = True duration[-1] = float(file_md["seconds"]) if "minutes" in file_md: put_duration = True duration[-2] = float(file_md["minutes"]) if "hours" in file_md: put_duration = True duration[-3] = float(file_md["hours"]) except BaseException as e: logging.warn(e, extra=file_md) if not put_duration and "length" in file_md: # Si recibo length y no la he recibido duration de otra forma try: duration[-1] = to_seconds(file_md["length"]) put_duration = True except BaseException as e: logging.error("Problema al parsear duración: 'length'", extra=file_md) if not put_duration and "duration" in file_md: # Si recibo duration y no la he recibido de otra forma try: duration[-1] = to_seconds(file_md["duration"]) put_duration = True except BaseException as e: logging.error("Problema al parsear duración: 'duration'", extra=file_md) if put_duration: carry = 0 for i in xrange(len(duration)-1,-1,-1): unit = long(duration[i]) + carry duration[i] = unit%60 carry = unit/60 view_md["length"] = "%d:%02d:%02d" % tuple(duration) if duration[-3] > 0 else "%02d:%02d" % tuple(duration[-2:]) # Tamaño para vídeos e imágenes if "width" in file_md and 'height' in file_md: try: width = ( int(file_md["width"].replace("pixels","").replace("px","")) if isinstance(file_md["width"], basestring) else int(file_md["width"])) height = ( int(file_md["height"].replace("pixels","").replace("px","")) if isinstance(file_md["width"], basestring) else int(file_md["height"])) view_md["size"] = "%dx%dpx" % (width, height) except BaseException as e: logging.warn(e) # Metadatos que no cambian try: view_md.update((meta, file_md[meta]) for meta in ( ("folders","description","fileversion","os","files","pages","format") if details else ("files","pages","format")) if meta in file_md) except BaseException as e: logging.warn(e) # Metadatos multimedia try: if file_type in ("audio", "video", "image"): view_md.update((meta, file_md[meta]) for meta in ("genre", "track", "artist", "author", "colors") if meta in file_md) except BaseException as e: logging.warn(e) # No muestra titulo si es igual al nombre del fichero title = None if "name" in file_md: title = u(file_md["name"]) elif "title" in file_md: title = u(file_md["title"]) if title: show_title = True text_longer = title text_shorter = f["view"]["fn"] if len(text_shorter)>len(text_longer): text_longer, text_shorter = text_shorter, text_longer if text_longer.startswith(text_shorter): text_longer = text_longer[len(text_shorter):] if len(text_longer)==0 or (len(text_longer)>0 and text_longer.startswith(".") and text_longer[1:] in EXTENSIONS): show_title = False if show_title: view_md["title"] = title # Los que cambian o son especificos de un tipo try: if file_type == 'audio': #album, year, bitrate, seconds, track, genre, length if 'album' in file_md: year = 0 if "year" in file_md: md_year = u(file_md["year"]).strip().split() for i in md_year: if i.isdigit() and len(i) == 4: year = int(i) break album = file_md["album"] view_md["album"] = ("%s (%d)" % (album, year)) if 1900 < year < 2100 else album if 'bitrate' in file_md: bitrate = "%s kbps" % u(file_md["bitrate"]).replace("~","") view_md["quality"] = ( # bitrate o bitrate - soundtype ("%s - %s" % (bitrate, file_md["soundtype"])) if details and "soundtype" in file_md else bitrate) elif file_type == 'archive': #title, name, unpackedsize, folders, files if "unpackedsize" in file_md: view_md["unpackedsize"] = file_md["unpackedsize"] elif file_type == 'document': #title, author, pages, format, version if details: if "format" in file_md: view_md["format"] = "%s%s" % (file_md["format"], " %s" % file_md["formatversion"] if "formatversion" in file_md else "") version = [] if "version" in file_md: version.append(u(file_md["version"])) if "revision" in file_md: version.append(u(file_md["revision"])) if version: view_md["version"] = " ".join(version) elif file_type == 'image': #title, artist, description, width, height, colors pass elif file_type == 'software': #title, version, fileversion, os if "title" in view_md and "version" in file_md: view_md["title"] += " %s" % file_md["version"] elif file_type == 'video': if details: quality = [] try: if 'framerate' in file_md: quality.append("%d fps" % int(float(file_md["framerate"]))) except BaseException as e: logging.warn(e) if 'codec' in file_md: quality.append(u(file_md["codec"])) if quality: view_md["quality"] = " - ".join(quality) view_md.update((i, file_md[i]) for i in ("series", "episode", "season") if i in file_md) except BaseException as e: logging.warn("%s\n\t%s\n\t%s" % (e, f, view_md)) #if len(view_md)>0: f['view']['mdh']={} for metadata,value in view_md.items(): if isinstance(value, basestring): final_value = Markup(value).striptags() final_value = searchable(highlight(text,final_value) if text else final_value, details) else: final_value = value f['view']['mdh'][metadata] = final_value
def download(file_id,file_name=None): ''' Muestra el archivo a descargar, votos, comentarios y archivos relacionados ''' def choose_filename_related(file_data): ''' Devuelve el nombre de fichero elegido ''' f=init_data(file_data) choose_filename(f) return f def comment_votes(file_id,comment): ''' Obtiene los votos de comentarios ''' comment_votes={} if "vs" in comment: for i,comment_vote in enumerate(usersdb.get_file_comment_votes(file_id)): if not comment_vote["_id"] in comment_votes: comment_votes[comment_vote["_id"][0:40]]=[0,0,0] if comment_vote["k"]>0: comment_votes[comment_vote["_id"][0:40]][0]+=1 else: comment_votes[comment_vote["_id"][0:40]][1]+=1 #si el usuario esta logueado y ha votado se guarda para mostrarlo activo if current_user.is_authenticated() and comment_vote["u"]==current_user.id: comment_votes[comment_vote["_id"][0:40]][2]=comment_vote["k"] return comment_votes #guardar los parametros desde donde se hizo la busqueda si procede args={} if request.referrer: querystring = urlparse(request.referrer).query if querystring: for params in u(querystring).encode("UTF-8").split("&"): param=params.split("=") if len(param) == 2: args[param[0]]=u(urllib.unquote_plus(param[1])) try: file_id=url2mid(file_id) except Exception as e: # Comprueba que se trate de un ID antiguo try: possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("%s - %s" % (e, file_id)) flash("link_not_exist", "error") abort(404) return redirect( url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), code=301) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) try: data = filesdb.get_file(file_id, bl = None) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) # intenta sacar el id del servidor de sphinx, # resuelve inconsistencias de los datos if not data: sid = get_id_server_from_search(file_id, file_name) if sid: try: data = filesdb.get_file(file_id, sid = sid, bl = None) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) if data: if not data["bl"] in (0, None): if data["bl"] == 1: flash("link_not_exist", "error") elif data["bl"] == 3: flash("error_link_removed", "error") goback = True #block_files( mongo_ids=(data["_id"],) ) abort(404) else: flash("link_not_exist", "error") abort(404) #obtener los datos file_data=fill_data(data, True, file_name) if file_data["view"]["sources"]=={}: #si tiene todos los origenes bloqueados flash("error_link_removed", "error") abort(404) save_visited([file_data]) # Título title = u(file_data['view']['fn']) g.title = u"%s \"%s%s\"%s%s" % ( _(file_data['view']['action']).capitalize(), title[:50], "..." if len(title) > 50 else "", " - " if g.title else "", g.title) #obtener los archivos relacionados related_files = search_related(split_file(file_data["file"])[0][:10]) bin_file_id=mid2bin(file_id) ids=sorted({fid[0:3] for related in related_files for fid in get_ids(related) if fid[0]!=bin_file_id})[:5] files_related=[choose_filename_related(data) for data in get_files(ids)] #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo vote=None if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) if vote is None: vote={"k":0} #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"][0:24]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] return render_template('files/download.html',file=file_data,args=args,vote=vote,files_related=files_related,comments=comments,form=form)
def innergroup_hash(field_path, afile): return hash(u(reduce(dictget, field_path, afile)) or "")
def download(file_id,file_name=None): ''' Muestra el archivo a descargar, votos, comentarios y archivos relacionados ''' def choose_filename_related(file_data): ''' Devuelve el nombre de fichero elegido ''' f=init_data(file_data) choose_filename(f) return f def comment_votes(file_id,comment): ''' Obtiene los votos de comentarios ''' comment_votes={} if "vs" in comment: for i,comment_vote in enumerate(usersdb.get_file_comment_votes(file_id)): if not comment_vote["_id"] in comment_votes: comment_votes[comment_vote["_id"][0:40]]=[0,0,0] if comment_vote["k"]>0: comment_votes[comment_vote["_id"][0:40]][0]+=1 else: comment_votes[comment_vote["_id"][0:40]][1]+=1 #si el usuario esta logueado y ha votado se guarda para mostrarlo activo if current_user.is_authenticated() and comment_vote["u"]==current_user.id: comment_votes[comment_vote["_id"][0:40]][2]=comment_vote["k"] return comment_votes if file_name is not None: g.title = _("download").capitalize()+" "+file_name+" - "+g.title else: g.title =_("download").capitalize() #guardar los parametros desde donde se hizo la busqueda si procede args={} if request.referrer: querystring = urlparse(request.referrer).query if querystring: for params in querystring.split("&"): param=params.split("=") if len(param) == 2: args[param[0]]=u(urllib.unquote_plus(param[1])) try: file_id=url2mid(file_id) except BaseException as e: logging.warn((e, file_id)) abort(404) data = filesdb.get_file(file_id, bl = None) if data: if not data["bl"] in (0, None): if data["bl"] == 1: flash("link_not_exist", "error") elif data["bl"] == 3: flash("error_link_removed", "error") goback = True #block_files( mongo_ids=(data["_id"],) ) abort(404) else: flash("link_not_exist", "error") abort(404) #obtener los datos file_data=fill_data(data, True, file_name) save_visited([file_data]) #obtener los archivos relacionados related_files = search_related(split_file(file_data["file"])[0][0:50]) bin_file_id=mid2bin(file_id) ids=sorted({fid for related in related_files for fid in get_ids(related) if fid[0]!=bin_file_id})[:5] files_related=[choose_filename_related(data) for data in get_files(ids)] #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo vote=None if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) if vote is None: vote={"k":0} #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"][0:24]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] return render_template('files/download.html',file=file_data,args=args,vote=vote,files_related=files_related,comments=comments,form=form)
def build_source_links(f): ''' Construye los enlaces correctamente ''' def get_domain(src): ''' Devuelve el dominio de una URL ''' url_parts=urlparse(src).netloc.split('.') i=len(url_parts)-1 if len(url_parts[i])<=2 and len(url_parts[i-1])<=3: return url_parts[i-2]+'.'+url_parts[i-1]+'.'+url_parts[i] else: return url_parts[i-1]+'.'+url_parts[i]; f['view']['action']='download' f['view']['sources']={} max_weight=0 icon="" # agrupación de origenes source_groups = {} file_sources = f['file']['src'].items() file_sources.sort(key=lambda x:x[1]["t"]) for hexuri,src in file_sources: if not src.get('bl',None) in (0, None): continue url_pattern=downloader=join=False count=0 part=url="" source_data=g.sources[src["t"]] if "t" in src and src["t"] in g.sources else None if source_data is None: #si no existe el origen del archivo logging.error("El fichero contiene un origen inexistente en la tabla \"sources\": %s" % src["t"], extra={"file":f}) if feedbackdb.initialized: feedbackdb.notify_source_error(f['file']["_id"], f['file']["s"]) continue elif "crbl" in source_data and int(source_data["crbl"])==1: #si el origen esta bloqueado continue elif "w" in source_data["g"] or "f" in source_data["g"] or "s" in source_data["g"]: #si es descarga directa o streaming link_weight=1 tip=source_data["d"] icon="web" source_groups[icon] = tip source=get_domain(src['url']) if "f" in source_data["g"] else source_data["d"] url=src['url'] if "url_pattern" in source_data and not url.startswith(("https://","http://","ftp://")): url_pattern=True #en caso de duda se prefiere streaming if "s" in source_data["g"]: f['view']['action']="listen" if f['view']['ct']==CONTENT_AUDIO else 'watch' link_weight*=2 #torrenthash antes de torrent porque es un caso especifico elif source_data["d"]=="BitTorrentHash": downloader=True link_weight=0.7 if 'torrent:tracker' in f['file']['md'] or 'torrent:trackers' in f['file']['md'] else 0.1 tip="Torrent MagnetLink" source="tmagnet" icon="torrent" if not icon in source_groups: source_groups[icon] = tip # magnet link tiene menos prioridad para el texto join=True count=int(src['m']) part="xt=urn:btih:"+src['url'] if 'torrent:tracker' in f['file']['md']: part += unicode('&tr=' + urllib.quote_plus(u(f['file']['md']['torrent:tracker']).encode("UTF-8")), "UTF-8") elif 'torrent:trackers' in f['file']['md']: trackers = f['file']['md']['torrent:trackers'] if isinstance(trackers, basestring): part += unicode("".join('&tr='+urllib.quote_plus(tr) for tr in u(trackers).encode("UTF-8").split(" ")), "UTF-8") elif "t" in source_data["g"]: downloader=True link_weight=0.8 url=src['url'] if "url_pattern" in source_data and not url.startswith(("https://","http://","ftp://")): url_pattern=True tip=source=get_domain(source_data["url_pattern"]%url) else: tip=source=get_domain(src['url']) icon="torrent" source_groups[icon] = tip elif source_data["d"]=="Gnutella": link_weight=0.2 tip="Gnutella" source=icon="gnutella" part="xt=urn:sha1:"+src['url'] join=True count=int(src['m']) source_groups[icon] = tip elif source_data["d"]=="eD2k": downloader=True link_weight=0.1 tip="eD2k" source=icon="ed2k" url="ed2k://|file|"+f['view']['pfn']+"|"+str(f['file']['z'] if "z" in f["file"] else 1)+"|"+src['url']+"|/" count=int(src['m']) source_groups[icon] = tip elif source_data["d"]=="Tiger": link_weight=0 tip="Gnutella" source=icon="gnutella" part="xt=urn:tiger:"+src['url'] join=True elif source_data["d"]=="MD5": link_weight=0 tip="Gnutella" source=icon="gnutella" part="xt=urn:md5:"+src['url'] source_groups[icon] = tip join=True else: continue if source in f['view']['sources']: view_source = f['view']['sources'][source] else: view_source = f['view']['sources'][source] = {} view_source.update(source_data) if 'downloader' in view_source: if downloader: view_source['downloader']=1 else: view_source['downloader']=1 if downloader else 0 view_source['tip']=tip view_source['icon']=icon view_source['icons']=source_data.get("icons",False) view_source['join']=join view_source['source']="streaming" if "s" in source_data["g"] else "direct_download" if "w" in source_data["g"] else "P2P" if "p" in source_data["g"] else "" #para no machacar el numero si hay varios archivos del mismo source if not 'count' in view_source or count>0: view_source['count']=count if not "parts" in view_source: view_source['parts']=[] if not 'urls' in view_source: view_source['urls']=[] if part: view_source['parts'].append(part) if url: if url_pattern: view_source['urls']=[source_data["url_pattern"]%url] f['view']['source_id']=url view_source["pattern_used"]=True elif not "pattern_used" in view_source: view_source['urls'].append(url) if source_data["d"]!="eD2k": view_source['count']+=1 if link_weight>max_weight: max_weight = link_weight f['view']['source'] = source f['view']['source_groups'] = sorted(source_groups.items()) if "source" not in f["view"]: raise FileNoSources if icon!="web": for src,info in f['view']['sources'].items(): if info['join']: f['view']['sources'][src]['urls'].append("magnet:?"+"&".join(info['parts'])+"&dn="+f['view']['pfn']+("&xl="+str(f['file']['z']) if 'z' in f['file'] else "")) elif not 'urls' in info: del(f['view']['sources'][src])
def download(file_id, file_name=""): g.page_type = FILE_PAGE_TYPE if request.referrer: try: posibles_queries = referrer_parser.match(request.referrer) if posibles_queries: query = posibles_queries.group(1) or posibles_queries.group(2) or "" if query: get_query_info(u(urllib2.unquote_plus(query).decode("utf-8"))) except: pass error = None file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except TypeError as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=404 else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301} except BaseException as e: logging.exception(e) error=503 file_id=None if file_id: try: file_data=get_file_metadata(file_id, file_name.replace("-"," ")) except DatabaseError: error=503 except FileNotExist: error=404 except (FileRemoved, FileFoofindRemoved, FileNoSources): error=410 except FileUnknownBlock: error=404 if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=503 if error: abort(error) # completa datos de torrent file_data = torrents_data(file_data, True, g.category) if not file_data: abort(404) if file_data["view"]["category"]: g.category = file_data["view"]["category"] if file_data["view"]["category"].tag=="p**n": g.is_adult_content = True else: g.category = file_data["view"]["category_type"] # no permite acceder ficheros que deberian ser bloqueados prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn']) if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]): g.blacklisted_content = "File" if not g.show_blacklisted_content: abort(404) query = download_search(file_data, file_name, "torrent").replace("-"," ") related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None) # elige el titulo de la página title = file_data['view']['fn'] # recorta el titulo hasta el proximo separador if len(title)>101: for pos in xrange(101, 30, -1): if title[pos] in SEPPER: title = title[:pos].strip() break else: title = title[:101] g.title = [title] page_description = "" if "description" in file_data["view"]["md"]: page_description = file_data["view"]["md"]["description"].replace("\n", " ") if not page_description: if g.category: page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize() else: page_description = _("download_desc") if len(page_description)<50: if page_description: page_description += ". " page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"]) if len(page_description)>180: last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0 if last_stop<100: last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0 if last_stop<100: last_stop = 180 page_description = page_description[:last_stop]+"." g.page_description = page_description is_canonical_filename = file_data["view"]["seo-fn"]==file_name # registra visita al fichero if g.search_bot: searchd.log_bot_event(g.search_bot, True) else: save_visited([file_data]) if related[0]: g.must_cache = 3600 # last-modified g.last_modified = file_data["file"]["ls"] return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))
def choose_filename(f,text_cache=None): ''' Elige el archivo correcto ''' srcs = f['file']['src'] fns = f['file']['fn'] chosen = None max_count = -1 current_weight = -1 if text_cache and text_cache[0] in fns: # Si text es en realidad un ID de fn chosen = text_cache[0] else: for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue for crc,srcfn in src['fn'].items(): if crc not in fns: #para los sources que tienen nombre pero no estan en el archivo continue #si no tiene nombre no se tiene en cuenta m = srcfn['m'] if len(fns[crc]['n'])>0 else 0 if 'c' in fns[crc]: fns[crc]['c']+=m else: fns[crc]['c']=m text_weight = 0 if text_cache: fn_parts = slugify(fns[crc]['n']).strip().split(" ") if len(fn_parts)>0: text_words = slugify(text_cache[0]).split(" ") # valora numero y orden coincidencias last_pos = -1 max_length = length = 0 occurrences = [0]*len(text_words) for part in fn_parts: pos = text_words.index(part) if part in text_words else -1 if pos != -1 and (last_pos==-1 or pos==last_pos+1): length += 1 else: if length > max_length: max_length = length length = 0 if pos != -1: occurrences[pos]=1 last_pos = pos if length > max_length: max_length = length text_weight = sum(occurrences)*100 + max_length f['file']['fn'][crc]['tht'] = text_weight better = fns[crc]['c']>max_count if text_weight > current_weight or (better and text_weight==current_weight): current_weight = text_weight chosen = crc max_count = fns[crc]['c'] f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) f['view']['fnid'] = chosen if chosen: filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src filename = "" for hexuri,src in srcs.items(): if src['url'].find("/")!=-1: filename = src['url'] if filename=="": return filename = filename[filename.rfind("/")+1:] ext = filename[filename.rfind(".")+1:] filename = filename[0:filename.rfind(".")] #TODO si no viene nombre de archivo buscar en los metadatos para formar uno (por ejemplo serie - titulo capitulo) filename = extension_filename(filename,ext) f['view']['fn'] = filename.replace("?", "") f['view']['qfn'] = qfn = u(filename).encode("UTF-8") #nombre del archivo escapado para generar las url de descarga f['view']['pfn'] = urllib.quote(qfn).replace(" ", "%20") # P2P filename nfilename = seoize_text(filename, " ",True, 0) f['view']['nfn'] = nfilename # añade el nombre del fichero como palabra clave g.keywords.update(set(keyword for keyword in nfilename.split(" ") if len(keyword)>1)) #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if text_cache: f['view']['fnh'], f['view']['fnhs'] = highlight(text_cache[2],filename,True) else: f['view']['fnh'] = filename #esto es solo para download que nunca tiene text return current_weight>0 # indica si ha encontrado el texto buscado
def url_search_filter(new_params, args=None, delete_params=[]): ''' Devuelve los parametros para generar una URL de busqueda ''' # filtros actuales sin parametros eliminados filters = {key:value for key, value in args.iteritems() if key not in delete_params} if "all" not in delete_params else {"q":args["q"]} if "q" in args else {} # añade paramentros nuevos if "query" in new_params: filters["q"] = u(new_params["query"]) if 'src' in new_params: active_srcs = g.active_srcs new_src = new_params["src"] ''' se genera el nuevo enlace teniendo en cuenta los origenes activos por tipo de origen - hacer click en un tipo de origen lo activan o desactivan completamente - hacer click en un origen lo activa o desactiva, teniendo en cuenta si el tipo de origen estaba activado o si toca activar todo el tipo - tiene en cuenta solo los origenes visibles en el filtro ''' # streaming has_streaming, toggle_streaming = "streaming" in active_srcs, "streaming" == new_src streamings = [] if has_streaming and toggle_streaming else ["streaming"] if toggle_streaming else [value for value in g.visible_sources_streaming if (value==new_src)^(value in active_srcs)] if streamings==g.visible_sources_streaming: streamings = ["streaming"] # activa todo el tipo de origen # download has_download, toggle_download = "download" in active_srcs, "download" == new_src downloads = [] if has_download and toggle_download else ["download"] if toggle_download else [value for value in g.visible_sources_download if (value==new_src)^(value in active_srcs)] if downloads==g.visible_sources_download: downloads = ["download"] # activa todo el tipo de origen # p2p has_p2p, toggle_p2p = "p2p" in active_srcs, "p2p" == new_src p2ps = [] if has_p2p and toggle_p2p else ["p2p"] if toggle_p2p else [value for value in g.sources_p2p if (value==new_src)^(value in active_srcs)] if p2ps==g.sources_p2p: p2ps = ["p2p"] # activa todo el tipo de origen filters["src"] = streamings + downloads + p2ps if 'type' in new_params: filters["type"] = [value for value in FILTERS["type"] if (value==new_params["type"])^(value in filters["type"])] if "type" in filters else [new_params["type"]] if 'size' in new_params: filters["size"] = new_params["size"] # si estan todos los tipos activados en type o src es como no tener ninguno if "type" in filters and (not filters["type"] or all(atype in filters["type"] for atype in FILTERS["type"])): del filters["type"] if "src" in filters and (not filters["src"] or all(src in filters["src"] for src in FILTERS["src"].iterkeys())): del filters["src"] # separa query if "q" in filters: query = filters["q"].replace(" ","_") if filters["q"] else u"" del filters["q"] else: query = u"" # genera url de salida if filters: return g.search_url + quote_plus(query.encode('utf8')) + "/" + "/".join(param+":"+",".join(filters[param]) for param in ["type", "src", "size"] if param in filters) else: return g.search_url + quote_plus(query.encode('utf8'))
def generate(server, part, afilter, batch_size, output): if not output: output = dirname(abspath(__file__)) + "/gen/" + str(part) + "/" ff = FilesFetcher(server, afilter, batch_size) ff.start() suffix = "."+str(part) count = error_count = 0 logging.info("Comienza generación de sitemap en servidor %s."%server) for afile in ff: try: count += 1 # comprueba si no está bloqueado if int(float(afile.get("bl", 0)))!=0: continue # comprueba si tiene origenes validos for src in afile["src"].itervalues(): if "t" in src and src["t"] in {3, 7, 79, 80, 81, 82, 83, 90} and int(float(src.get("bl",0)))==0: main_src = src break else: continue filename = None # elige algun nombre de fichero interesante for fn in afile.get("fn",{}).itervalues(): filename = fn["n"] if filename=="download" or IS_BTIH.match(filename) or filename.startswith("[TorrentDownloads"): continue extension = fn.get("x",None) if extension and not filename.endswith("."+extension): filename += "." + extension break else: md = afile.get("md",{}) for possible_name in ("torrent:name", "torrent:title", "video:title", "video:name"): if possible_name in md: filename = u(md[possible_name]) break if not filename: filename = u(main_src["url"].rsplit("/",1)[-1]) if filename: first_seen = afile["fs"] get_writer(first_seen, count, output, suffix).write("<url><lastmod>%s</lastmod><loc>%%s%s-%s</loc></url>\n"%(first_seen.strftime("%Y-%m-%dT%H:%M:%SZ"), seoize_text(filename, "-", True), mid2url(afile["_id"]))) except BaseException as e: error_count += 1 if error_count>100: raise e # ante mas de 100 errores, detiene la indexacion con error if count%10000==0: logging.info("Progreso de generación de sitemap del servidor %s."%(server), extra={"count":count, "error_count":error_count}) close_writers() sort_files(output) logging.info("Finaliza la generación de sitemap en servidor %s."%server)