def init_data(file_data, ntts=[]): ''' Inicializa el diccionario de datos del archivo ''' content_fixes(file_data) file_data["id"]=mid2url(file_data['_id']) file_data['name']=file_data['src'].itervalues().next()['url'] file_se = file_data["se"] if "se" in file_data else None ntt = ntts[int(float(file_se["_id"]))] if file_se and "_id" in file_se and file_se["_id"] in ntts else None if ntt: file_se["info"] = ntt file_se["rel"] = [ntts[relid] for relids in ntt["r"].itervalues() for relid in relids if relid in ntts] if "r" in ntt else [] return {"file":file_data,"view":{}}
def fill_data(file_data, text=None, ntts={}): ''' Añade los datos necesarios para mostrar los archivos ''' if text: slug_text = slugify(text) text = (text, slug_text, frozenset(slug_text.split(" "))) # se asegura que esten cargados los datos de origenes y servidor de imagen antes de empezar fetch_global_data() f=init_data(file_data, ntts) content_fixes(f["file"]) choose_file_type(f) # al elegir nombre de fichero, averigua si aparece el texto buscado search_text_shown = choose_filename(f,text) build_source_links(f) embed_info(f) get_images(f) # si hace falta, muestra metadatos extras con el texto buscado format_metadata(f,text, search_text_shown) return f
def init_file(afile): global current_id # gets file's id current_id = file_id = str(afile["_id"]) # fixes a contenidos try: content_fixes(afile) except BaseException as e: logging.exception("Error fixing content file %s."%file_id) # entidades semanticas if "se" in afile and afile["se"] and "_id" in afile["se"]: try: entity = int(afile["se"]["_id"]) if "rel" in afile["se"]: rels = afile["se"]["rel"] afile["_ntts"] = u"%04d%s%s"%(entity, PHRASE_SEPARATOR*10 + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[0]) if rels[0] else "", PHRASE_SEPARATOR*(10-len(rels[0])/10) + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[1]) if rels[1] else "") except ValueError: logging.exception("Error parsing entity id %s for file %s."%(str(afile["se"]["_id"]), file_id)) entity = 0 except: logging.exception("Error generating entity metadata for file %s."%file_id) entity = 0 else: entity = 0 md = afile["md"] md_schemaless_keys = {key.split(":")[-1] for key in md.iterkeys()} # tipos del fichero src = afile["src"] types = {int(s["t"]) for uri, s in src.iteritems() if "t" in s and s["t"] in sources} if not types: return False isP2P = any(u"p" in sources[t]["g"] for t in types) # valores dependientes de tipos torrent_ct = None if not isP2P: return False trackers = md["torrent:trackers"] if "torrent:trackers" in md else 1 if "torrent:tracker" in md else 0 if isinstance(trackers, basestring): trackers = trackers.count(" ") # mira si es fichero Torrent o Torrent Hash main_type = 7 if 7 in types and len(types)==1 else 3 rate = rate_torrent(afile) afile["_r"] = rate["rating"]*10 afile["_d"] = rate["seeds"] inner_group = 0 # secondary rating r2 = 1 if "thumbnail" in md_schemaless_keys or ("i" in afile and isinstance(afile["i"],list)): r2+=2 # ficheros con imagenes if "description" in md_schemaless_keys: r2+=1 # ficheros con descripcion afile["_r2"] = r2 # uri del fichero afile["_uri0"], afile["_uri1"], afile["_uri2"] = afile_struct.unpack(afile['_id'].binary) fs = afile["date"] if "date" in afile else afile["fs"] fs = long(mktime(fs.timetuple())) if fs<now: afile["_fs"] = fs fns = nlargest(5, ((sum(sfn["fn"][crc]["m"] if "fn" in sfn and crc in sfn["fn"] else 0 for sfn in src.itervalues()), fn) for crc,fn in afile["fn"].iteritems())) afile["_fns"] = separator_join(f[1]["n"]+("."+(f[1].get("x",None) or "")) for f in fns) res = [[seoize_text(f[1]["n"], separator=" ", is_url=False, max_length=100, min_length=20)] for f in fns] # informacion del contenido ct, file_tags, file_format = guess_doc_content_type(afile, sources) # tags del fichero file_type = CONTENTS[ct].lower() file_category = [] file_category_tag = file_category_type = None for category in config["TORRENTS_CATEGORIES"]: if category.tag in file_tags and (not file_category or category.tag=="p**n"): # always use adult when its present if category.content_main: file_category.append(category.cat_id) else: file_category.insert(0,category.cat_id) if category.content_main and category.content==file_type: file_category_type = category.cat_id file_category_tag = category.tag afile["_ct"] = file_category[0] if file_category else file_category_type # tamaño try: z = float(afile["z"]) if "z" in afile and afile["z"] else False except: z = False if ct == CONTENT_VIDEO: try: l = int(float(md["video:duration"])) if "video:duration" in md else \ int(float(md["video:length"])) if "video:length" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ 60*int(float(md["video:minutes"])) if "video:minutes" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False elif ct == CONTENT_AUDIO: try: l = int(float(md["audio:seconds"])) if "audio:seconds" in md else \ sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \ False except: l = False try: bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video except: bitrate = False else: bitrate = l = False if z<1: z = False if l<1: l = False if bitrate: if l and not z: z = l*(bitrate<<7) # bitrate en kbps pasado a Kbps elif z and not l: l = z/(bitrate<<7) afile["_l"] = int(l) if 0<int(l)<0xFFFF else False afile["_z"] = log(z,2) if z else False # metadatos mds = chain(chain(*res), chain(value for key,value in md.iteritems() if key in GOOD_MDS and isinstance(value, basestring) and len(value)<=GOOD_MDS[key])) afile["_md"] = separator_join(amd for amd in mds if amd) # origenes afile["_s"] = [unicode(t) for t in types] # filtros de texto filters = {FILTER_PREFIX_CONTENT_TYPE+CONTENTS[ct]} filters.add("%storrent"%FILTER_PREFIX_SOURCE_GROUP) filters.update("%s%02d"%(prefix,int(md[key])) for key, prefix in numeric_filters.iteritems() if key in md and (isinstance(md[key], int) or isinstance(md[key], float) or (isinstance(md[key], basestring) and md[key].isdecimal()))) file_words = [word.strip().replace("-"," ") for key, value in md.iteritems() if value and isinstance(value, basestring) and any(key.endswith(dtag_md) for dtag_md in DYNAMIC_TAGS_METADATA) for word in SUBCATEGORIES_FINDER.findall(value.lower())] + file_tags dtags = {(tag, DYNAMIC_TAGS[tag][word]) for tag in file_tags if tag in DYNAMIC_TAGS for word in file_words if word in DYNAMIC_TAGS[tag]} if file_category_tag and file_category_tag in DYNAMIC_TAGS and file_category_tag not in file_tags and not any(category in file_tags for category in DYNAMIC_TAGS.iterkeys()): type_dtags = [(file_category_tag, DYNAMIC_TAGS[file_category_tag][word]) for word in file_words if word in DYNAMIC_TAGS[file_category_tag]] dtags.update(type_dtags) if type_dtags: file_tags.append(file_category_tag) filters.update("%s%s"%(FILTER_PREFIX_TAGS, tag) for tag in file_tags) filters.update("%s%s"%(FILTER_PREFIX_DYNAMIC_TAGS, dtag.replace(" ","")) for tag, dtag in dtags) if file_format: filters.add(FILTER_PREFIX_FORMAT+file_format[0]) afile["_fil"] = " ".join(filters) afile["__dtags"] = dtags # grupos afile["_g"] = ((entity << 32) | (((afile["_ct"] or 0)&0xF) << 28) | ((main_type & 0xFFFF) << 12) | (inner_group & 0xFFF)) return True