Python content_fixes Exemples, foofind.datafixes.content_fixes Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : fill_data.py Projet : Saectar/foofind-web

def init_data(file_data, ntts=[]):
    '''
    Inicializa el diccionario de datos del archivo
    '''
    content_fixes(file_data)
    file_data["id"]=mid2url(file_data['_id'])
    file_data['name']=file_data['src'].itervalues().next()['url']

    file_se = file_data["se"] if "se" in file_data else None
    ntt = ntts[int(float(file_se["_id"]))] if file_se and "_id" in file_se and file_se["_id"] in ntts else None
    if ntt:
        file_se["info"] = ntt

        file_se["rel"] = [ntts[relid] for relids in ntt["r"].itervalues() for relid in relids if relid in ntts] if "r" in ntt else []

    return {"file":file_data,"view":{}}

Exemple #2

0

Afficher le fichier

Fichier : fill_data.py Projet : Hermy/foofind-web

def fill_data(file_data, text=None, ntts={}):
    '''
    Añade los datos necesarios para mostrar los archivos
    '''
    if text:
        slug_text = slugify(text)
        text = (text, slug_text, frozenset(slug_text.split(" ")))

    # se asegura que esten cargados los datos de origenes y servidor de imagen antes de empezar
    fetch_global_data()
    f=init_data(file_data, ntts)
    content_fixes(f["file"])

    choose_file_type(f)
    # al elegir nombre de fichero, averigua si aparece el texto buscado
    search_text_shown = choose_filename(f,text)
    build_source_links(f)
    embed_info(f)
    get_images(f)
    # si hace falta, muestra metadatos extras con el texto buscado
    format_metadata(f,text, search_text_shown)
    return f

Exemple #3

0

Afficher le fichier

Fichier : sphinx.py Projet : Weej1/www

def init_file(afile):
    global current_id

    # gets file's id
    current_id = file_id = str(afile["_id"])

    # fixes a contenidos
    try:
        content_fixes(afile)
    except BaseException as e:
        logging.exception("Error fixing content file %s."%file_id)

    # entidades semanticas
    if "se" in afile and afile["se"] and "_id" in afile["se"]:
        try:
            entity = int(afile["se"]["_id"])
            if "rel" in afile["se"]:
                rels = afile["se"]["rel"]
                afile["_ntts"] = u"%04d%s%s"%(entity, PHRASE_SEPARATOR*10 + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[0]) if rels[0] else "", PHRASE_SEPARATOR*(10-len(rels[0])/10) + space_join(str(ntt_id).rjust(4,"0")+ntt_rel for ntt_id, ntt_rel in rels[1]) if rels[1] else "")
        except ValueError:
            logging.exception("Error parsing entity id %s for file %s."%(str(afile["se"]["_id"]), file_id))
            entity = 0
        except:
            logging.exception("Error generating entity metadata for file %s."%file_id)
            entity = 0
    else:
        entity = 0

    md = afile["md"]
    md_schemaless_keys = {key.split(":")[-1] for key in md.iterkeys()}

    # tipos del fichero
    src = afile["src"]
    types = {int(s["t"]) for uri, s in src.iteritems() if "t" in s and s["t"] in sources}
    if not types: return False
    isP2P = any(u"p" in sources[t]["g"] for t in types)

    # valores dependientes de tipos
    torrent_ct = None
    if not isP2P:
        return False

    trackers = md["torrent:trackers"] if "torrent:trackers" in md else 1 if "torrent:tracker" in md else 0
    if isinstance(trackers, basestring): trackers = trackers.count(" ")

    # mira si es fichero Torrent o Torrent Hash
    main_type = 7 if 7 in types and len(types)==1 else 3

    rate = rate_torrent(afile)
    afile["_r"] = rate["rating"]*10
    afile["_d"] = rate["seeds"]

    inner_group = 0

    # secondary rating
    r2 = 1
    if "thumbnail" in md_schemaless_keys or ("i" in afile and isinstance(afile["i"],list)): r2+=2  # ficheros con imagenes
    if "description" in md_schemaless_keys: r2+=1  # ficheros con descripcion
    afile["_r2"] = r2

    # uri del fichero
    afile["_uri0"], afile["_uri1"], afile["_uri2"] = afile_struct.unpack(afile['_id'].binary)

    fs = afile["date"] if "date" in afile else afile["fs"]
    fs = long(mktime(fs.timetuple()))
    if fs<now: afile["_fs"] = fs

    fns = nlargest(5, ((sum(sfn["fn"][crc]["m"] if "fn" in sfn and crc in sfn["fn"] else 0 for sfn in src.itervalues()), fn) for crc,fn in afile["fn"].iteritems()))
    afile["_fns"] = separator_join(f[1]["n"]+("."+(f[1].get("x",None) or "")) for f in fns)

    res = [[seoize_text(f[1]["n"], separator=" ", is_url=False, max_length=100, min_length=20)] for f in fns]

    # informacion del contenido
    ct, file_tags, file_format = guess_doc_content_type(afile, sources)

    # tags del fichero
    file_type = CONTENTS[ct].lower()
    file_category = []
    file_category_tag = file_category_type = None
    for category in config["TORRENTS_CATEGORIES"]:
        if category.tag in file_tags and (not file_category or category.tag=="p**n"): # always use adult when its present
            if category.content_main:
                file_category.append(category.cat_id)
            else:
                file_category.insert(0,category.cat_id)

        if category.content_main and category.content==file_type:
            file_category_type = category.cat_id
            file_category_tag = category.tag
    afile["_ct"] = file_category[0] if file_category else file_category_type

    # tamaño
    try:
        z = float(afile["z"]) if "z" in afile and afile["z"] else False
    except:
        z = False

    if ct == CONTENT_VIDEO:
        try:
            l = int(float(md["video:duration"])) if "video:duration" in md else \
                int(float(md["video:length"])) if "video:length" in md else \
                sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \
                60*int(float(md["video:minutes"])) if "video:minutes" in md else \
                False
        except:
            l = False
        try:
            bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video
        except:
            bitrate = False

    elif ct == CONTENT_AUDIO:
        try:
            l = int(float(md["audio:seconds"])) if "audio:seconds" in md else \
                sum(imap(mul, [int(float(part)) for part in ("0:0:0:" + str(md["length"])).split(":")[-4:]], [216000, 3600, 60, 1])) if "length" in md else \
                False
        except:
            l = False
        try:
            bitrate = int(str(md["bitrate"]).replace('~','')) if "bitrate" in md else 1280 # bitrate por defecto para video
        except:
            bitrate = False

    else:
        bitrate = l = False

    if z<1: z = False
    if l<1: l = False

    if bitrate:
        if l and not z: z = l*(bitrate<<7) # bitrate en kbps pasado a Kbps
        elif z and not l: l = z/(bitrate<<7)

    afile["_l"] = int(l) if 0<int(l)<0xFFFF else False
    afile["_z"] = log(z,2) if z else False

    # metadatos
    mds = chain(chain(*res), chain(value for key,value in md.iteritems() if key in GOOD_MDS and isinstance(value, basestring) and len(value)<=GOOD_MDS[key]))
    afile["_md"] = separator_join(amd for amd in mds if amd)

    # origenes
    afile["_s"] = [unicode(t) for t in types]

    # filtros de texto
    filters = {FILTER_PREFIX_CONTENT_TYPE+CONTENTS[ct]}
    filters.add("%storrent"%FILTER_PREFIX_SOURCE_GROUP)
    filters.update("%s%02d"%(prefix,int(md[key])) for key, prefix in numeric_filters.iteritems() if key in md and (isinstance(md[key], int) or isinstance(md[key], float) or (isinstance(md[key], basestring) and md[key].isdecimal())))

    file_words = [word.strip().replace("-"," ") for key, value in md.iteritems() if value and isinstance(value, basestring) and any(key.endswith(dtag_md) for dtag_md in DYNAMIC_TAGS_METADATA) for word in SUBCATEGORIES_FINDER.findall(value.lower())] + file_tags

    dtags = {(tag, DYNAMIC_TAGS[tag][word]) for tag in file_tags if tag in DYNAMIC_TAGS for word in file_words if word in DYNAMIC_TAGS[tag]}

    if file_category_tag and file_category_tag in DYNAMIC_TAGS and file_category_tag not in file_tags and not any(category in file_tags for category in DYNAMIC_TAGS.iterkeys()):
        type_dtags = [(file_category_tag, DYNAMIC_TAGS[file_category_tag][word]) for word in file_words if word in DYNAMIC_TAGS[file_category_tag]]
        dtags.update(type_dtags)
        if type_dtags:
            file_tags.append(file_category_tag)

    filters.update("%s%s"%(FILTER_PREFIX_TAGS, tag) for tag in file_tags)
    filters.update("%s%s"%(FILTER_PREFIX_DYNAMIC_TAGS, dtag.replace(" ","")) for tag, dtag in dtags)

    if file_format: filters.add(FILTER_PREFIX_FORMAT+file_format[0])

    afile["_fil"] = " ".join(filters)
    afile["__dtags"] = dtags

    # grupos
    afile["_g"] = ((entity << 32) |
                   (((afile["_ct"] or 0)&0xF) << 28) |
                   ((main_type & 0xFFFF) << 12) |
                   (inner_group & 0xFFF))

    return True