def get_query_info(query=None, category=None, subcategory=None, check_qs=True): must_redirect = False if not query and check_qs: query = request.args.get("q",None) if query: must_redirect = True if not category and check_qs: category = request.args.get("c",None) if category: must_redirect = True if query: g.clean_query = clean_query(query) g.query = g.clean_query.replace("_"," ") g.safe_query = seoize_text(query, " ").lower() if category: if category in g.categories_by_url: g.category = g.categories_by_url[category] if g.category.adult_content: g.is_adult_content = True if g.category and subcategory: subcategory = subcategory.replace("_", " ") if subcategory in g.category.all_subcategories: g.subcategory = subcategory return must_redirect
def seoize_filter(text, separator, is_url, max_length=None): return seoize_text(text, separator, is_url, max_length)
def format_metadata(f,text_cache, search_text_shown=False): ''' Formatea los metadatos de los archivos ''' text = text_cache[2] if text_cache else None view_md = f['view']['md'] = {} view_searches = f["view"]["searches"]={} file_type = f['view']['file_type'] if 'file_type' in f['view'] else None if 'md' in f['file']: #si viene con el formato tipo:metadato se le quita el tipo file_md = {(meta.split(":")[-1] if ":" in meta else meta): value for meta, value in f['file']['md'].iteritems()} # Duración para vídeo e imágenes seconds = get_float(file_md, "seconds") minutes = get_float(file_md, "minutes") hours = get_float(file_md, "hours") # Si no he recibido duracion de otra forma, pruebo con length y duration if seconds==minutes==hours==None: seconds = get_float(file_md, "length") or get_float(file_md, "duration") duration = [hours or 0, minutes or 0, seconds or 0] # h, m, s if any(duration): carry = 0 for i in xrange(len(duration)-1,-1,-1): unit = long(duration[i]) + carry duration[i] = unit%60 carry = unit/60 view_md["length"] = "%d:%02d:%02d" % tuple(duration) if duration[0] > 0 else "%02d:%02d" % tuple(duration[1:]) # Tamaño para vídeos e imágenes width = get_int(file_md, "width") height = get_int(file_md, "height") if width and height: view_md["size"] = "%dx%dpx" % (width, height) # Metadatos que no cambian try: view_md.update( (meta, file_md[meta]) for meta in ( "folders","description","fileversion","os","files","pages","format", "seeds","leechs","composer","publisher","encoding","director","writer","starring","producer","released" ) if meta in file_md ) view_searches.update( (meta, seoize_text(file_md[meta],"_",False)) for meta in ( "folders","os","composer","publisher","director","writer","starring","producer" ) if meta in file_md ) except BaseException as e: logging.warn(e) # thumbnail if "thumbnail" in file_md: f["view"]["thumbnail"] = file_md["thumbnail"] #metadatos que tienen otros nombres try: view_md.update(("tags", file_md[meta]) for meta in ("keywords", "tags", "tag") if meta in file_md) if "tags" in view_md and isinstance(view_md["tags"], basestring): view_searches["tags"] = [] view_md.update(("comments", file_md[meta]) for meta in ("comments", "comment") if meta in file_md) view_md.update(("track", file_md[meta]) for meta in ("track", "track_number") if meta in file_md) view_md.update(("created_by", file_md[meta]) for meta in ("created_by", "encodedby","encoder") if meta in file_md) view_md.update(("language", file_md[meta]) for meta in ("language", "lang") if meta in file_md) view_md.update(("date", file_md[meta]) for meta in ("published", "creationdate") if meta in file_md) view_md.update(("trackers", "\n".join(file_md[meta].split(" "))) for meta in ("trackers", "tracker") if meta in file_md and isinstance(file_md[meta], basestring)) view_md.update(("hash", file_md[meta]) for meta in ("hash", "infohash") if meta in file_md) view_md.update(("visualizations", file_md[meta]) for meta in ("count", "viewCount") if meta in file_md) if "unpackedsize" in file_md: view_md["unpacked_size"]=file_md["unpackedsize"] if "privateflag" in file_md: view_md["private_file"]=file_md["privateflag"] except BaseException as e: logging.warn(e) #torrents -> filedir filesizes filepaths if "filepaths" in file_md: filepaths = {} for path, size in izip_longest(u(file_md["filepaths"]).split("///"), u(file_md.get("filesizes","")).split(" "), fillvalue=None): # no permite tamaños sin fichero if not path: break parts = path.strip("/").split("/") # crea subdirectorios relative_path = filepaths for part in parts[:-1]: if "/"+part not in relative_path: relative_path["/"+part] = {} relative_path = relative_path["/"+part] # si ya existe el directorio no hace nada if "/"+parts[-1] in relative_path: pass # si el ultimo nivel se repite es un directorio (fallo de contenido) elif parts[-1] in relative_path: relative_path["/"+parts[-1]] = {} del relative_path[parts[-1]] else: relative_path[parts[-1]] = size if "filedir" in file_md: filepaths = {"/"+u(file_md["filedir"]).strip("/"):filepaths} if filepaths: view_md["filepaths"] = filepaths view_searches["filepaths"] = {} # Metadatos multimedia try: #extraccion del codec de video y/o audio if "video_codec" in file_md: #si hay video_codec se concatena el audio_codec detras si es necesario view_md["codec"]=file_md["video_codec"]+" "+file_md["audio_codec"] if "audio_codec" in file_md else file_md["video_codec"] else: #sino se meten directamente view_md.update(("codec", file_md[meta]) for meta in ("audio_codec", "codec") if meta in file_md) if file_type in ("audio", "video", "image"): view_md.update((meta, file_md[meta]) for meta in ("genre", "track", "artist", "author", "colors") if meta in file_md) view_searches.update((meta, seoize_text(file_md[meta], "_", False)) for meta in ("artist", "author") if meta in file_md) except BaseException as e: logging.warn(e) # No muestra titulo si es igual al nombre del fichero if "name" in file_md: title = u(file_md["name"]) elif "title" in file_md: title = u(file_md["title"]) else: title = f['view']['nfn'] if title: show_title = True text_longer = title text_shorter = f["view"]["fn"] if len(text_shorter)>len(text_longer): text_longer, text_shorter = text_shorter, text_longer if text_longer.startswith(text_shorter): text_longer = text_longer[len(text_shorter):] if len(text_longer)==0 or (len(text_longer)>0 and text_longer.startswith(".") and text_longer[1:] in EXTENSIONS): show_title = False if show_title: view_md["title"] = title view_searches["title"] = seoize_text(title, "_", False) # Los que cambian o son especificos de un tipo try: if "date" in view_md: #intentar obtener una fecha válida try: view_md["date"]=format_datetime(datetime.fromtimestamp(strtotime(view_md["date"]))) except: del view_md["date"] if file_type == 'audio': #album, year, bitrate, seconds, track, genre, length if 'album' in file_md: album = u(file_md["album"]) year = get_int(file_md, "year") if album: view_md["album"] = album + (" (%d)"%year if year and 1900<year<2100 else "") view_searches["album"] = seoize_text(album, "_", False) if 'bitrate' in file_md: # bitrate o bitrate - soundtype o bitrate - soundtype - channels bitrate = get_int(file_md, "bitrate") if bitrate: soundtype=" - %s" % file_md["soundtype"] if "soundtype" in file_md else "" channels = get_float(file_md, "channels") channels=" (%g %s)" % (round(channels,1),_("channels")) if channels else "" view_md["quality"] = "%g kbps %s%s" % (bitrate,soundtype,channels) elif file_type == 'document': #title, author, pages, format, version if "format" in file_md: view_md["format"] = "%s%s" % (file_md["format"]," %s" % file_md["formatversion"] if "formatversion" in file_md else "") version = [] if "formatVersion" in file_md: version.append(u(file_md["formatVersion"])) elif "version" in file_md: version.append(u(file_md["version"])) if "revision" in file_md: version.append(u(file_md["revision"])) if version: view_md["version"] = " ".join(version) elif file_type == 'image': #title, artist, description, width, height, colors pass elif file_type == 'software': #title, version, fileversion, os if "title" in view_md and "version" in file_md: view_md["title"] += " %s" % file_md["version"] view_searches["title"] += " %s" % seoize_text(file_md["version"], "_", False) elif file_type == 'video': quality = [] framerate = get_int(file_md, "framerate") if framerate: quality.append("%d fps" % framerate) if 'codec' in view_md: #si ya venia codec se muestra ahora en quality solamente quality.append(u(view_md["codec"])) del view_md["codec"] if quality: view_md["quality"] = " - ".join(quality) if "series" in file_md: series = u(file_md["series"]) if series: safe_series = seoize_text(series, "_", False) view_md["series"] = series view_searches["series"]="%s_%s"%(safe_series,"(series)") season = get_int(file_md, "season") if season: view_md["season"] = season view_searches["season"]="%s_(s%d)"%(safe_series,season) episode = get_int(file_md, "episode") if episode: view_md["episode"] = episode view_searches["episode"]="%s_(s%de%d)"%(safe_series,season,episode) except BaseException as e: logging.exception("Error obteniendo metadatos especificos del tipo de contenido.") view_mdh=f['view']['mdh']={} for metadata,value in view_md.items(): if isinstance(value, basestring): value = clean_html(value) if not value: del view_md[metadata] continue view_md[metadata]=value # resaltar contenidos que coinciden con la busqueda, para textos no muy largos if len(value)<500: view_mdh[metadata]=highlight(text,value) if text and len(text)<100 else value elif isinstance(value, float): #no hay ningun metadato tipo float view_md[metadata]=str(int(value)) else: view_md[metadata]=value
def choose_filename(f,text_cache=None): ''' Elige el archivo correcto ''' srcs = f['file']['src'] fns = f['file']['fn'] chosen = None max_count = -1 current_weight = -1 if text_cache and text_cache[0] in fns: # Si text es en realidad un ID de fn chosen = text_cache[0] else: for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue for crc,srcfn in src['fn'].items(): if crc not in fns: #para los sources que tienen nombre pero no estan en el archivo continue #si no tiene nombre no se tiene en cuenta m = srcfn['m'] if len(fns[crc]['n'])>0 else 0 if 'c' in fns[crc]: fns[crc]['c']+=m else: fns[crc]['c']=m text_weight = 0 if text_cache: fn_parts = slugify(fns[crc]['n']).strip().split(" ") if len(fn_parts)>0: text_words = slugify(text_cache[0]).split(" ") # valora numero y orden coincidencias last_pos = -1 max_length = length = 0 occurrences = [0]*len(text_words) for part in fn_parts: pos = text_words.index(part) if part in text_words else -1 if pos != -1 and (last_pos==-1 or pos==last_pos+1): length += 1 else: if length > max_length: max_length = length length = 0 if pos != -1: occurrences[pos]=1 last_pos = pos if length > max_length: max_length = length text_weight = sum(occurrences)*100 + max_length f['file']['fn'][crc]['tht'] = text_weight better = fns[crc]['c']>max_count if text_weight > current_weight or (better and text_weight==current_weight): current_weight = text_weight chosen = crc max_count = fns[crc]['c'] f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) f['view']['fnid'] = chosen if chosen: filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src filename = "" for hexuri,src in srcs.items(): if src['url'].find("/")!=-1: filename = src['url'] if filename=="": return filename = filename[filename.rfind("/")+1:] ext = filename[filename.rfind(".")+1:] filename = filename[0:filename.rfind(".")] #TODO si no viene nombre de archivo buscar en los metadatos para formar uno (por ejemplo serie - titulo capitulo) filename = extension_filename(filename,ext) f['view']['fn'] = filename.replace("?", "") f['view']['qfn'] = qfn = u(filename).encode("UTF-8") #nombre del archivo escapado para generar las url de descarga f['view']['pfn'] = urllib.quote(qfn).replace(" ", "%20") # P2P filename nfilename = seoize_text(filename, " ",True, 0) f['view']['nfn'] = nfilename # añade el nombre del fichero como palabra clave g.keywords.update(set(keyword for keyword in nfilename.split(" ") if len(keyword)>1)) #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if text_cache: f['view']['fnh'], f['view']['fnhs'] = highlight(text_cache[2],filename,True) else: f['view']['fnh'] = filename #esto es solo para download que nunca tiene text return current_weight>0 # indica si ha encontrado el texto buscado
def init_file(afile): global current_id # gets file's id current_id = file_id = str(afile["_id"]) # fixes a contenidos try: content_fixes(afile) except BaseException as e: logging.exception("Error fixing content file %s."%file_id) # entidades semanticas if "se" in afile and afile["se"] and "_id" in afile["se"]: try: entity = int(afile["se"]["_id"]) if "rel" in afile["se"]: rels = afile["se"]["rel"] afile["_ntts"] = u"%04d%s%s"%(entity, PHRASE_SEPARATOR*10 + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[0]) if rels[0] else "", PHRASE_SEPARATOR*(10-len(rels[0])/10) + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[1]) if rels[1] else "") except ValueError: logging.exception("Error parsing entity id %s for file %s."%(str(afile["se"]["_id"]), file_id)) entity = 0 except: logging.exception("Error generating entity metadata for file %s."%file_id) entity = 0 else: entity = 0 md = afile["md"] md_schemaless_keys = {key.split(":")[-1] for key in md.iterkeys()} # tipos del fichero src = afile["src"] types = {int(s["t"]) for uri, s in src.iteritems() if "t" in s and s["t"] in sources} if not types: return False isP2P = any(u"p" in sources[t]["g"] for t in types) # valores dependientes de tipos torrent_ct = None if not isP2P: return False trackers = md["torrent:trackers"] if "torrent:trackers" in md else 1 if "torrent:tracker" in md else 0 if isinstance(trackers, basestring): trackers = trackers.count(" ") # mira si es fichero Torrent o Torrent Hash main_type = 7 if 7 in types and len(types)==1 else 3 rate = rate_torrent(afile) afile["_r"] = rate["rating"]*10 afile["_d"] = rate["seeds"] inner_group = 0 # secondary rating r2 = 1 if "thumbnail" in md_schemaless_keys or ("i" in afile and isinstance(afile["i"],list)): r2+=2 # ficheros con imagenes if "description" in md_schemaless_keys: r2+=1 # ficheros con descripcion afile["_r2"] = r2 # uri del fichero afile["_uri0"], afile["_uri1"], afile["_uri2"] = afile_struct.unpack(afile['_id'].binary) fs = afile["date"] if "date" in afile else afile["fs"] fs = long(mktime(fs.timetuple())) if fs<now: afile["_fs"] = fs fns = nlargest(5, ((sum(sfn["fn"][crc]["m"] if "fn" in sfn and crc in sfn["fn"] else 0 for sfn in src.itervalues()), fn) for crc,fn in afile["fn"].iteritems())) afile["_fns"] = separator_join(f[1]["n"]+("."+(f[1].get("x",None) or "")) for f in fns) res = [[seoize_text(f[1]["n"], separator=" ", is_url=False, max_length=100, min_length=20)] for f in fns] # informacion del contenido ct, file_tags, file_format = guess_doc_content_type(afile, sources) # tags del fichero file_type = CONTENTS[ct].lower() file_category = [] file_category_tag = file_category_type = None for category in config["TORRENTS_CATEGORIES"]: if category.tag in file_tags and (not file_category or category.tag=="p**n"): # always use adult when its present if category.content_main: file_category.append(category.cat_id) else: file_category.insert(0,category.cat_id) if category.content_main and category.content==file_type: file_category_type = category.cat_id file_category_tag = category.tag afile["_ct"] = file_category[0] if file_category else file_category_type # tamaño try: z = float(afile["z"]) if "z" in afile and afile["z"] else False except: z = False if ct == CONTENT_VIDEO: try: l = int(float(md["video:duration"])) if "video:duration" in md else \ int(float(md["video:length"])) if "video:length" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ 60*int(float(md["video:minutes"])) if "video:minutes" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False elif ct == CONTENT_AUDIO: try: l = int(float(md["audio:seconds"])) if "audio:seconds" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False else: bitrate = l = False if z<1: z = False if l<1: l = False if bitrate: if l and not z: z = l*(bitrate<<7) # bitrate en kbps pasado a Kbps elif z and not l: l = z/(bitrate<<7) afile["_l"] = int(l) if 0<int(l)<0xFFFF else False afile["_z"] = log(z,2) if z else False # metadatos mds = chain(chain(*res), chain(value for key,value in md.iteritems() if key in GOOD_MDS and isinstance(value, basestring) and len(value)<=GOOD_MDS[key])) afile["_md"] = separator_join(amd for amd in mds if amd) # origenes afile["_s"] = [unicode(t) for t in types] # filtros de texto filters = {FILTER_PREFIX_CONTENT_TYPE+CONTENTS[ct]} filters.add("%storrent"%FILTER_PREFIX_SOURCE_GROUP) filters.update("%s%02d"%(prefix,int(md[key])) for key, prefix in numeric_filters.iteritems() if key in md and (isinstance(md[key], int) or isinstance(md[key], float) or (isinstance(md[key], basestring) and md[key].isdecimal()))) file_words = [word.strip().replace("-"," ") for key, value in md.iteritems() if value and isinstance(value, basestring) and any(key.endswith(dtag_md) for dtag_md in DYNAMIC_TAGS_METADATA) for word in SUBCATEGORIES_FINDER.findall(value.lower())] + file_tags dtags = {(tag, DYNAMIC_TAGS[tag][word]) for tag in file_tags if tag in DYNAMIC_TAGS for word in file_words if word in DYNAMIC_TAGS[tag]} if file_category_tag and file_category_tag in DYNAMIC_TAGS and file_category_tag not in file_tags and not any(category in file_tags for category in DYNAMIC_TAGS.iterkeys()): type_dtags = [(file_category_tag, DYNAMIC_TAGS[file_category_tag][word]) for word in file_words if word in DYNAMIC_TAGS[file_category_tag]] dtags.update(type_dtags) if type_dtags: file_tags.append(file_category_tag) filters.update("%s%s"%(FILTER_PREFIX_TAGS, tag) for tag in file_tags) filters.update("%s%s"%(FILTER_PREFIX_DYNAMIC_TAGS, dtag.replace(" ","")) for tag, dtag in dtags) if file_format: filters.add(FILTER_PREFIX_FORMAT+file_format[0]) afile["_fil"] = " ".join(filters) afile["__dtags"] = dtags # grupos afile["_g"] = ((entity << 32) | (((afile["_ct"] or 0)&0xF) << 28) | ((main_type & 0xFFFF) << 12) | (inner_group & 0xFFF)) return True
def download_file(file_id,file_name=None): ''' Devuelve el archivo a descargar, votos, comentarios y archivos relacionados ''' error=(None,"") #guarda el id y el texto de un error file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto if is_valid_url_fileid(file_id): try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except (bson.objectid.InvalidId, TypeError) as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=(404,"link_not_exist") else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":(301,"")} except BaseException as e: logging.exception(e) error=(503,"") file_id=None else: abort(404) if file_id: try: file_data=get_file_metadata(file_id, file_name) except DatabaseError: error=(503,"") except FileNotExist: error=(404,"link_not_exist") except (FileRemoved, FileFoofindRemoved, FileNoSources): error=(410,"error_link_removed") except FileUnknownBlock: error=(404,"") if error[0] is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=(503,"") if file_id is None or error[0] is not None: html="" if error[0] is not None: #si hay algun error se devuelve renderizado message_msgid="error_%s_message" % error[0] message_msgstr=_(message_msgid) g.title="%s %s" % (error[0], message_msgstr if message_msgstr!=message_msgid else _("error_500_message")) html=render_template('error.html',error=error,full_screen=True) return {"html": html,"play":None,"file_data":file_data,"error":error} else: save_visited([file_data]) title = u(file_data['view']['fn']) g.title = u"%s \"%s\" - %s" % ( _(file_data['view']['action']).capitalize(), title[:100], g.title) g.page_description = u"%s %s"%(_(file_data['view']['action']).capitalize(), seoize_text(title," ",True)) #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo y si ha marcado el archivo como favorito vote=None favorite = False if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) favorite=any(file_id==favorite["id"] for favorite in usersdb.get_fav_files(current_user)) #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"].split("_")[0]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] # en la pagina de download se intentan obtener palabras para buscar si no las hay if g.args.get("q", None) is None: query = download_search(file_data, file_name, "foofind") if query: g.args["q"] = query.replace(":","") return { "html":render_template('files/download.html',file=file_data,vote={"k":0} if vote is None else vote,favorite=favorite,form=form,comments=comments), "play":file_data["view"]["play"] if "play" in file_data["view"] else "", "file_data":file_data, }
def generate(server, part, afilter, batch_size, output): if not output: output = dirname(abspath(__file__)) + "/gen/" + str(part) + "/" ff = FilesFetcher(server, afilter, batch_size) ff.start() suffix = "."+str(part) count = error_count = 0 logging.info("Comienza generación de sitemap en servidor %s."%server) for afile in ff: try: count += 1 # comprueba si no está bloqueado if int(float(afile.get("bl", 0)))!=0: continue # comprueba si tiene origenes validos for src in afile["src"].itervalues(): if "t" in src and src["t"] in {3, 7, 79, 80, 81, 82, 83, 90} and int(float(src.get("bl",0)))==0: main_src = src break else: continue filename = None # elige algun nombre de fichero interesante for fn in afile.get("fn",{}).itervalues(): filename = fn["n"] if filename=="download" or IS_BTIH.match(filename) or filename.startswith("[TorrentDownloads"): continue extension = fn.get("x",None) if extension and not filename.endswith("."+extension): filename += "." + extension break else: md = afile.get("md",{}) for possible_name in ("torrent:name", "torrent:title", "video:title", "video:name"): if possible_name in md: filename = u(md[possible_name]) break if not filename: filename = u(main_src["url"].rsplit("/",1)[-1]) if filename: first_seen = afile["fs"] get_writer(first_seen, count, output, suffix).write("<url><lastmod>%s</lastmod><loc>%%s%s-%s</loc></url>\n"%(first_seen.strftime("%Y-%m-%dT%H:%M:%SZ"), seoize_text(filename, "-", True), mid2url(afile["_id"]))) except BaseException as e: error_count += 1 if error_count>100: raise e # ante mas de 100 errores, detiene la indexacion con error if count%10000==0: logging.info("Progreso de generación de sitemap del servidor %s."%(server), extra={"count":count, "error_count":error_count}) close_writers() sort_files(output) logging.info("Finaliza la generación de sitemap en servidor %s."%server)
def torrents_data(data, details=False): valid_torrent = False providers = [] if not data or not "sources" in data["view"]: return None for source in data["view"]["sources"].keys(): if source == "tmagnet": valid_torrent = True elif data["view"]["sources"][source]["icon"] == "torrent": valid_torrent = True providers.append(source) if u"i" in data["view"]["sources"][source]["g"]: data["view"]["sources"]["download_ind"] = data["view"]["sources"][source] else: data["view"]["sources"]["download"] = data["view"]["sources"][source] # no tiene origenes validos if not valid_torrent: return None desc = None # downloader if data["view"]["sources"][data["view"]["source"]]["downloader"] == 1 and request.user_agent.platform != "windows": # lo desactiva para los no windows data["view"]["sources"][data["view"]["source"]]["downloader"] = 0 # organiza mejor la descripcion del fichero if details and "description" in data["view"]["md"]: # recupera la descripcion original desc = data["view"]["md"]["description"] del data["view"]["md"]["description"] # inicializa variables long_desc = False short_desc = None acum = [] # recorre las lineas de la descripcion for line in desc.split("\n"): # elimina enlaces line = URL_DETECTOR.sub("", line) # si llega a pasar despues acumular algo, hay que mostrar la desc larga if acum: long_desc = True # ignora lineas con muchos caracteres repetidos prev_char = repeat_count = 0 for char in line: if prev_char == char: repeat_count += 1 else: repeat_count = 0 if repeat_count > 5: line = "" break prev_char = char # si la linea es "corta", la toma como fin de parrafo if len(line) < 50: if acum: if line: acum.append(line) # si el parrafo es mas largo que 110, lo usa paraph = " ".join(acum) acum = [] # antes de seguir reinicia el acum paraph_len = len(paraph) if paraph_len > 90: short_desc = paraph if paraph_len > 140: # si no es suficientemente larga sigue buscando break continue else: # si no, acumula acum.append(line) # procesa el parrafo final if acum: paraph = " ".join(acum) paraph_len = len(paraph) if paraph_len > 90: short_desc = paraph # si hay descripcion corta se muestra y se decide si se debe mostrar la larga tambien if short_desc: data["view"]["md"]["short_desc"] = short_desc long_desc = long_desc or len(short_desc) > 400 else: long_desc = True if not long_desc and "nfo" in data["file"]["md"]: desc = data["file"]["md"]["nfo"] long_desc = True if long_desc and short_desc != desc: if len(desc) > 400: data["view"]["md"]["long_desc"] = URL_DETECTOR.sub(r'<a rel="nofollow" href="\1">\1</a>', desc) else: data["view"]["md"]["description"] = URL_DETECTOR.sub(r'<a rel="nofollow" href="\1">\1</a>', desc) # tags del fichero file_tags = data["view"]["tags"] if "tags" in data["view"] else [] file_category = file_category_type = None file_categories = [] for category in g.categories: if category.tag in file_tags: if not file_category: file_category = category file_categories.append(category) if not file_category_type and category.content_main and category.content == data["view"]["file_type"]: file_category_type = category data["view"]["category"] = file_category data["view"]["categories"] = file_categories data["view"]["category_type"] = file_category_type has_trailer = data["view"]["has_trailer"] = file_category and (file_category.url in ["movies", "games"]) if desc: if has_trailer: trailer = TRAILER_DETECTOR.findall(desc) youtube_id = get_video_id(trailer[0]) if trailer else None if youtube_id: data["view"]["trailer_link"] = "http://www.youtube.com/embed/%s?autoplay=1" % youtube_id imdb = IMDB_DETECTOR.findall(desc) if imdb: data["view"]["imdb_link"] = imdb[0] # salud del torrent try: seeds = int(float(data["view"]["md"]["seeds"])) if "seeds" in data["view"]["md"] else 0 except: seeds = 0 try: leechs = int(float(data["view"]["md"]["leechs"])) if "leechs" in data["view"]["md"] else 0 except: leechs = 0 data["view"]["health"] = int(2 / (leechs + 1.0)) if seeds == 0 else min(10, int(seeds / (leechs + 1.0) * 5)) data["view"]["rating"] = int((data["view"]["health"] + 1) / 2) data["view"]["icon"] = file_category or file_category_type or CATEGORY_UNKNOWN data["view"]["providers"] = providers data["view"]["seo-fn"] = seoize_text(data["view"]["fn"], "-", True) return data