Python KEYWORD.KEYWORD Examples, whoosh.fields.KEYWORD.KEYWORD Python Examples

Example #1

0

Show file

File: schema.py Project: debon/abilian-core

class _DefaultSearchSchema(SchemaClass):
    """
  General search schema
  """
    object_key = ID(stored=True, unique=True)
    id = NUMERIC(numtype=int, bits=64, signed=False, stored=True, unique=False)
    object_type = ID(stored=True, unique=False)
    creator = ID(stored=True)
    owner = ID(stored=True)

    #: security index. This list roles and user/group ids allowed to *see* this
    #: content
    allowed_roles_and_users = KEYWORD(stored=True)

    #: tags indexing
    tag_ids = KEYWORD(stored=True)
    tag_text = TEXT(stored=False, analyzer=accent_folder)

    # hierarchical index of ids path ('/' is the separator)
    parent_ids = FieldType(format=Existence(),
                           analyzer=PathTokenizer(),
                           stored=True,
                           unique=False)

    name = TEXT(stored=True, analyzer=accent_folder)
    slug = ID(stored=True)
    description = TEXT(stored=True, analyzer=accent_folder)
    text = TEXT(stored=False, analyzer=accent_folder)

Example #2

0

Show file

File: views.py Project: manpercar1/TeamKraken_v.1.0.0

def esquema_equipo(listaEquipos):

    #definimos el esquema de la información
    schem = Schema(codigo=TEXT(stored=True),
                   nombre=KEYWORD(stored=True),
                   urlEquipo=TEXT(stored=True),
                   categoria=KEYWORD(stored=True))

    #si ya existe el directorio del índice, lo eliminamos
    if os.path.exists("Index"):
        shutil.rmtree("Index")
    os.mkdir("Index")

    #creamos el índice
    ix = create_in("Index", schema=schem)
    #creamos un writer para poder añadir documentos al índice
    writer = ix.writer()

    for equipo in listaEquipos:
        #Ahora añadimos cada elemento de la lista de equipos obtenidos al índice que hemos creado
        writer.add_document(codigo=str(equipo[0]),
                            nombre=str(equipo[1]),
                            urlEquipo=str(equipo[2]),
                            categoria=str(equipo[3]))

    writer.commit()

Example #3

0

Show file

File: views.py Project: manpercar1/TeamKraken_v.1.0.0

def esquema_detalles_equipo(datos):

    #Vamos a crear dos esquemas, uno para los datos del equipo, y otro para los jugadores

    #ESQUEMA DE LOS DATOS DEL EQUIPO
    schem = Schema(codEquipo=TEXT(stored=True),
                   nombre=KEYWORD(stored=True),
                   domicilio=KEYWORD(stored=True),
                   localidad=TEXT(stored=True),
                   provincia=TEXT(stored=True),
                   codPostal=TEXT(stored=True),
                   email=TEXT(stored=True),
                   key=TEXT(stored=True))

    if os.path.exists("Index_equipo"):
        shutil.rmtree("Index_equipo")
    os.mkdir("Index_equipo")

    ix = create_in("Index_equipo", schema=schem)
    writer = ix.writer()

    writer.add_document(codEquipo=str(datos[6]),
                        nombre=str(datos[0]),
                        domicilio=str(datos[1]),
                        localidad=str(datos[2]),
                        provincia=str(datos[3]),
                        codPostal=str(datos[4]),
                        email=str(datos[5]),
                        key="equipo")

    writer.commit()

    #ESQUEMA DE LOS JUGADORES
    #El nombre es KEYWORD porque puede ser un nombre compuesto
    schemJugadores = Schema(nombre=KEYWORD(stored=True),
                            apellidos=KEYWORD(stored=True),
                            equipo=TEXT(stored=True))

    jugadores = datos[7]

    nombre = ""
    apellidos = ""

    if os.path.exists("Index_jugadores"):
        shutil.rmtree("Index_jugadores")
    os.mkdir("Index_jugadores")

    ix = create_in("Index_jugadores", schema=schemJugadores)
    writerJugadores = ix.writer()

    for jugador in jugadores:
        jugadorApellidosNombre = jugador.split(",")
        nombre = jugadorApellidosNombre[1]
        apellidos = jugadorApellidosNombre[0]
        writerJugadores.add_document(nombre=str(nombre),
                                     apellidos=str(apellidos),
                                     equipo=str(datos[6]))

    writerJugadores.commit()

Example #4

0

Show file

def make_schema():
    return Schema(
        paper_field=KEYWORD(stored=True, lowercase=True, scorable=True),
        title=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        authors=KEYWORD(stored=True, lowercase=True),
        pdf=ID(stored=True),
        abstract=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        date=DATETIME(stored=True),
    )

Example #5

0

Show file

File: views.py Project: manpercar1/TeamKraken_v.1.0.0

def jugadoresList(request, id_equipo):
    if request.user.is_authenticated == False:
        return redirect('/login')

    schemJugadores = Schema(id=NUMERIC(int, stored=True),
                            nombre=KEYWORD(stored=True),
                            apellidos=KEYWORD(stored=True),
                            posicion=KEYWORD(stored=True),
                            equipo=TEXT(stored=True))

    eq = get_object_or_404(Equipo, pk=id_equipo)
    jugadores = Jugador.objects.filter(equipo=id_equipo)

    if os.path.exists("Index_jugadores"):
        shutil.rmtree("Index_jugadores")
    os.mkdir("Index_jugadores")

    ix = create_in("Index_jugadores", schema=schemJugadores)
    writerJugadores = ix.writer()

    for jugador in jugadores:
        writerJugadores.add_document(id=int(jugador.id),
                                     nombre=str(jugador.nombre),
                                     apellidos=str(jugador.apellidos),
                                     posicion=str(jugador.posicionPrincipal),
                                     equipo=str(jugador.equipo))

    writerJugadores.commit()

    if request.method == 'POST':
        posicion = request.POST['posicion']
        ix_jugadores = open_dir("Index_jugadores")
        jugadores = []

        with ix_jugadores.searcher() as searcherJugadores:
            consulta = str(posicion) + " " + str(eq)
            query = MultifieldParser(["posicion", "equipo"],
                                     ix_jugadores.schema,
                                     group=AndGroup).parse(str(consulta))
            #ponemos límite None para que nos devuelva todos los resultados
            diccionariosJugadores = searcherJugadores.search(query, limit=None)
            auxJugadores = {}

            for dicJugadores in diccionariosJugadores:
                auxJugadores = {
                    'id': dicJugadores['id'],
                    'nombre': dicJugadores['nombre'],
                    'apellidos': dicJugadores['apellidos'],
                    'posicionPrincipal': dicJugadores['posicion']
                }

                jugadores.append(auxJugadores)

    return render(request, 'principal/jugadores.html', {
        'jugadores': jugadores,
        'eq': eq
    })

Example #6

0

Show file

def getSchema():
    """ Crea y devuelve un esquema para la búsqueda"""
    return Schema(jornada=KEYWORD(stored=True),
                  equipos=TEXT(stored=True),
                  resultado=TEXT(stored=True),
                  fecha=KEYWORD(stored=True),
                  autor=TEXT(stored=True), 
                  titular=TEXT(stored=True),
                  titulo=KEYWORD(stored=True),
                  cuerpo=TEXT())

Example #7

0

Show file

File: views.py Project: Diegonmen/StreamSurfer

def schemaSerie():
    schem = Schema(idSerie=ID(stored=True, unique=True),
                   titulo=TEXT(stored=True),
                   tituloOriginal=TEXT(stored=True),
                   imdb=TEXT(stored=True),
                   fechaEstreno=TEXT(stored=True),
                   poster=TEXT(stored=True),
                   temporadas=TEXT(stored=True),
                   generos=KEYWORD(stored=True, commas=True),
                   plataformas=KEYWORD(stored=True, commas=True),
                   links=KEYWORD(stored=True, commas=True))
    return schem

Example #8

0

Show file

File: handler.py Project: codeix/bst.pygasus.demo

class CardIndexSchema(SchemaClass):
    id = NUMERIC(unique=True)
    name = KEYWORD()
    type = KEYWORD()
    layout = TEXT()
    text = TEXT
    colors = KEYWORD(commas=True)
    costs = NUMERIC()
    power = TEXT()
    toughness = TEXT()
    availability = NUMERIC()
    card = STORED

Example #9

0

Show file

File: searcher.py Project: reclosedev/jirafts

 def _get_schema(self, language):
     lang_analyzer = LanguageAnalyzer(language)
     return Schema(
         key=ID(stored=True, unique=True),
         assignee=ID(stored=True),
         reporter=ID(stored=True),
         status=ID(stored=True),
         summary=TEXT(analyzer=lang_analyzer, field_boost=2.0),
         description=TEXT(analyzer=lang_analyzer),
         comments_str=TEXT(analyzer=lang_analyzer),
         labels=KEYWORD(stored=True, lowercase=True),
         components=KEYWORD(stored=True, lowercase=True),
     )

Example #10

0

Show file

def get_schema():
    return Schema(id=ID(stored=True),
                  name=NGRAMWORDS(stored=True,
                                  minsize=2,
                                  maxsize=12,
                                  at='start',
                                  queryor=True),
                  display=TEXT(stored=True),
                  zvalue=NUMERIC(stored=True),
                  kind=KEYWORD(stored=True),
                  sumlevel=KEYWORD(stored=True),
                  is_stem=NUMERIC(stored=True),
                  url_name=TEXT(stored=True))

Example #11

0

Show file

 def __get_index_schema(self):
     """
     :return: organization index schema
     """
     return Schema(id=NUMERIC(stored=True),
                   url=ID(stored=True),
                   external_id=ID(stored=True),
                   name=ID(stored=True),
                   domain_names=KEYWORD(stored=True, commas=True),
                   created_at=ID(stored=True),
                   details=ID(stored=True),
                   shared_tickets=BOOLEAN(stored=True),
                   tags=KEYWORD(stored=True, commas=True))

Example #12

0

Show file

File: views.py Project: Diegonmen/StreamSurfer

def schemaPelicula():
    schem = Schema(idPelicula=ID(stored=True, unique=True),
                   titulo=TEXT(stored=True),
                   tituloOriginal=TEXT(stored=True),
                   imdb=TEXT(stored=True),
                   fechaEstreno=TEXT(stored=True),
                   poster=TEXT(stored=True),
                   duracion=TEXT(stored=True),
                   director=TEXT(stored=True),
                   generos=KEYWORD(stored=True, commas=True),
                   plataformas=KEYWORD(stored=True, commas=True),
                   links=KEYWORD(stored=True, commas=True))
    return schem

Example #13

0

Show file

File: search_engine.py Project: future-analytics/wstore

    def create_index(self, offering):
        """
        Create a document entry for the offering in the
        search index
        """

        # Check if the index already exists to avoid overwrite it
        if not os.path.exists(self._index_path) or os.listdir(
                self._index_path) == []:
            # Create dir if needed
            if not os.path.exists(self._index_path):
                os.makedirs(self._index_path)

            # Create schema
            schema = Schema(id=KEYWORD(stored=True, unique=True),
                            owner=KEYWORD,
                            content=TEXT,
                            name=KEYWORD(sortable=True),
                            popularity=NUMERIC(int,
                                               decimal_places=2,
                                               sortable=True,
                                               signed=False),
                            date=DATETIME(sortable=True),
                            state=KEYWORD,
                            purchaser=KEYWORD(stored=True, commas=True))
            # Create index
            index = create_in(self._index_path, schema)
        else:
            index = open_dir(self._index_path)

        # Open the index
        index_writer = index.writer()

        # Aggregate all the information included in the USDL document in
        # a single string in order to add a new document to the index
        text = self._aggregate_text(offering)
        purchasers_text = self._aggregate_purchasers(offering)

        # Add the new document
        index_writer.add_document(id=unicode(offering.pk),
                                  owner=unicode(
                                      offering.owner_organization.pk),
                                  content=unicode(text),
                                  name=unicode(offering.name),
                                  popularity=Decimal(offering.rating),
                                  date=offering.creation_date,
                                  state=unicode(offering.state),
                                  purchaser=purchasers_text)
        index_writer.commit()

Example #14

0

Show file

File: __init__.py Project: sw00/pixelated-user-agent

 def _mail_schema(self):
     return Schema(ident=ID(stored=True, unique=True),
                   sender=ID(stored=False),
                   to=KEYWORD(stored=False, commas=True),
                   cc=KEYWORD(stored=False, commas=True),
                   bcc=KEYWORD(stored=False, commas=True),
                   subject=NGRAMWORDS(stored=False),
                   date=NUMERIC(stored=False,
                                sortable=True,
                                bits=64,
                                signed=False),
                   body=NGRAMWORDS(stored=False),
                   tag=KEYWORD(stored=True, commas=True),
                   flags=KEYWORD(stored=True, commas=True),
                   raw=TEXT(stored=False))

Example #15

0

Show file

def create_searchable_database(root, txt_files=True):
    """
    Loading all files in corpus to the
    :param root (string): Directory of corpus
    """

    index_dir = os.path.join(root, "index_dir")
    if txt_files:
        corpus_dir = os.path.join(root, "txt_dir")
    else:
        corpus_dir = os.path.join(root, "json_dir")
    while True:
        inpt = input(
            'Warning! This will remove the current index_dir. Type "ok" to continue,'
            ' or "exit" to abort: \n')
        if inpt == "ok":
            break
        elif inpt == "exit":
            exit()
    if os.path.exists(index_dir):
        shutil.rmtree(index_dir)
    os.mkdir(index_dir)
    schema = Schema(title=TEXT(stored=True),
                    keywords=KEYWORD(stored=True, scorable=True, commas=True),
                    content=TEXT(stored=True))

    ix = create_in(index_dir, schema)
    writer = ix.writer()
    if txt_files:
        add_txt_documents(writer, corpus_dir)
    else:
        add_json_documents(writer, corpus_dir)
    writer.commit()

Example #16

0

Show file

File: indexing_noRich.py Project: AlessioZoccoli/inCode

def minaceSchema():
    """
    Defining a basic schema for the index.
    fields:
        image: path
        ccompsHead: Components in the middle of a transcribed word, or single-grams if thei are not in ('s', 'e', 'l', 'm')
        ccompsTail: Ending tokens
        ccompsHeadTrace: Positional index of the tokens and their compounds

    """
    return Schema(
        image=ID(stored=True, unique=True),
        ccompsHead=KEYWORD(stored=True, sortable=True),
        ccompsTail=KEYWORD(stored=True, sortable=True),
        ccompsHeadTrace=STORED,
    )

Example #17

0

Show file

 def get_schema(self):
     return Schema(nid=ID(unique=True, stored=True),
                   url=ID(unique=True, stored=True),
                   title=TEXT(phrase=False),
                   tags=KEYWORD(lowercase=True, commas=True, scorable=True),
                   note=TEXT(analyzer=analyzer),
                   content=TEXT(stored=True, analyzer=analyzer))

Example #18

0

Show file

File: whoosh_backend.py Project: oclay1st/Alexandria

    def __init__(self, config):
        self.schema = Schema(
            id=ID(unique=True),
            title=TEXT(stored=True, field_boost=3.0, analyzer=StandardAnalyzer() | NgramFilter(minsize=2, maxsize=3)),
            author=TEXT(stored=True),
            creation_date=DATETIME(stored=True),
            pages=STORED,
            content=TEXT(stored=True, analyzer=StandardAnalyzer(stoplist=None)),
            lang=TEXT(stored=True),
            size=STORED,
            tags=KEYWORD(stored=True, commas=True)
        )

        self.index_path = config['WHOOSH_INDEX']

        if not os.path.exists(self.index_path):
            os.mkdir(self.index_path)
            create_in(self.index_path, self.schema)

        self.indexer = open_dir(self.index_path)
        self.parser_content = MultifieldParser(["title", "content"], schema=self.schema)
        self.parser_content.add_plugin(DateParserPlugin())
        self.date_format = {
            'last_24h': u'-24h to now',
            'last_week': u'last week',
            'last_month_to_now': u'-1mo to now',
            'last_year_to_now': u"[-2yrs to now]"
        }

Example #19

0

Show file

def get_whoosh_index(force_create=False):
  from whoosh.index import create_in, exists_in, open_dir
  from whoosh.fields import Schema, TEXT, KEYWORD, ID, STORED
  from whoosh.analysis import CharsetFilter, StemmingAnalyzer, NgramWordAnalyzer
  from whoosh.support.charset import accent_map

  analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
  ngramAnalyzer = NgramWordAnalyzer( minsize=2, maxsize=4)

  schema = Schema(
    title     = TEXT(analyzer=analyzer, spelling=True, stored=True, field_boost=3.0), 
    abstract  = TEXT(analyzer=analyzer, stored=True, field_boost=2.0), 
    path      = ID(unique=True, stored=True), 
    authors   = TEXT(analyzer=analyzer, sortable=True, field_boost=1.5), 
    content   = TEXT(analyzer=analyzer, stored=True), 
    tags      = KEYWORD(sortable=True, commas=True, field_boost=1.5, lowercase=True), 
    status    = KEYWORD,
    classname = KEYWORD,
    typeahead = TEXT(spelling=True, stored=True, phrase=False)
  )
    
  if not os.path.exists(settings.WHOOSH_ROOT):
    os.mkdir(settings.WHOOSH_ROOT)
  
  if not exists_in(settings.WHOOSH_ROOT) or force_create:
    index = create_in(settings.WHOOSH_ROOT, schema)
  else:
    index = open_dir(settings.WHOOSH_ROOT)
  return index

Example #20

0

Show file

File: whoosh0-main.py Project: carlos3xc/AII-Exercises

def get_schema_temas():
    return Schema(titulo=TEXT(stored=True),
                  link_tema=ID(unique=True, stored=True),
                  autor=KEYWORD(stored=True),
                  fecha=DATETIME(stored=True),
                  n_respuestas=STORED,
                  n_visitas=STORED)

Example #21

0

Show file

def indexar_datos_ttl():
    #isbn como texto debido a que supera el rango de whoosh
    esquema = Schema(id=NUMERIC(stored=True),
                     isbn=TEXT,
                     titulo=TEXT,
                     autor=TEXT,
                     genero=KEYWORD(commas=True),
                     descripcion=TEXT,
                     fechapublicacion=DATETIME,
                     precio=NUMERIC(numtype=float))
    if os.path.exists("IndexTtl"):
        shutil.rmtree("IndexTtl")
    os.mkdir("IndexTtl")
    ix = create_in("IndexTtl", schema=esquema)
    writer = ix.writer()
    it = 0
    for libro in TodosTusLibros.objects.all():
        writer.add_document(id=libro.pk,
                            isbn=str(libro.isbn),
                            autor=libro.autor,
                            genero=libro.categorias.replace(" ", ""),
                            descripcion=libro.descripcion,
                            fechapublicacion=libro.fechapublicacion,
                            precio=float(libro.precio.replace("€", "")))
        it += 1
    writer.commit()
    return it

Example #22

0

Show file

File: search_keys.py Project: Clara9/Whoosh_Project

    def search_terms(self, keyword, definition, flag):
        if not os.path.exists("indexdir"):
            os.mkdir("indexdir")

        schema = Schema(title=TEXT(stored=True), content=TEXT(stored = True), \
            subjective=KEYWORD(stored=True, lowercase=True, scorable=True))

        ix = index.create_in("indexdir", schema)

        with open('./data/sample.txt') as f:
            texts = list(f)

        with open('./data/sample-title.txt') as f2:
            titles = list(f2)

        with open('./data/subs.txt') as f3:
            subs = list(f3)

        writer = ix.writer()
        for i in range(len(titles)):
            writer.add_document(title=titles[i],
                                content=texts[i][1:-2],
                                subjective=subs[i])
        writer.commit()

        s = ix.searcher()

        if flag:
            query = QueryParser("content", ix.schema).parse(keyword)
        else:
            query = QueryParser("subjective", ix.schema).parse(definition)
        results = s.search(query, terms=True, limit=20)

        return results

Example #23

0

Show file

File: CreateSearchIndex.py Project: jannahastings/addiction-ontology

class OntologyContentSchema(SchemaClass):
    repo = ID(stored=True)
    spreadsheet = ID(stored=True)
    class_id = ID(stored=True)
    label = TEXT(stored=True)
    definition = TEXT(stored=True)
    parent = KEYWORD(stored=True)

Example #24

0

Show file

File: Indexer.py Project: Beaxx/monster.de-crawler-analyzer

    def __init__(self, search_term: str):

        self.schema = Schema(
            educational_requirements=TEXT(),
            employment_type=ID(),
            experience_requirements=TEXT(),
            industry=KEYWORD(),
            organization=ID(stored=True),
            title=TEXT(stored=True),
            url=STORED(),
            parent_identifier=NUMERIC(stored=True),

            # Paragraph Data Children
            type=ID(stored=True),
            parent=NUMERIC(),
            paragraph_number=NUMERIC(stored=True),
            paragraph_heading=TEXT(analyzer=Analyzing.ImprovedTokenizer(),
                                   stored=True),
            paragraph_content=TEXT(analyzer=Analyzing.ImprovedTokenizer(),
                                   stored=True))

        self.index_path: str = os.path.join(definitions.MAIN_PATH, "Storage",
                                            "Indexe", search_term)
        FileHandler.if_folder_not_existent_create(self.index_path)

        self.ix: Index = None
        self.writer: IndexWriter = None

Example #25

0

Show file

File: whoosh_backend.py Project: tsanov/bloodhound

    def test_groupedby_empty_field(self):
        schema = Schema(
            unique_id=ID(stored=True, unique=True),
            id=ID(stored=True),
            type=ID(stored=True),
            status=KEYWORD(stored=True),
            content=TEXT(stored=True),
        )

        ix = index.create_in(self.index_dir, schema=schema)
        with ix.writer() as w:
            w.add_document(unique_id=u"1", type=u"type1")
            w.add_document(unique_id=u"2", type=u"type2", status=u"New")

        facet_fields = (u"type", u"status")
        groupedby = facet_fields
        with ix.searcher() as s:
            r = s.search(
                query.Every(),
                groupedby=groupedby,
                maptype=sorting.Count,
            )
            facets = self._load_facets(r)
        self.assertEquals(
            {
                'status': {
                    None: 1,
                    'New': 1
                },
                'type': {
                    'type1': 1,
                    'type2': 1
                }
            }, facets)

Example #26

0

Show file

File: store.py Project: CLARIAH/docker-pyff

    def __init__(self, *args, **kwargs):
        self._dir = kwargs.pop('directory', '.whoosh')
        clear = bool(kwargs.pop('clear', False))
        self._name = kwargs.pop('name', config.store_name)
        self._scheduler = kwargs.pop('scheduler', None)
        if self._scheduler is None:
            self._scheduler = make_default_scheduler()
            self._scheduler.start()

        if clear:
            shutil.rmtree(self._dir)
        self.schema = Schema(content=NGRAMWORDS(stored=False))
        self.schema.add("object_id", ID(stored=True, unique=True))
        self.schema.add("entity_id", ID(stored=True, unique=True))
        for a in list(ATTRS.keys()):
            self.schema.add(a, KEYWORD())
        self._redis = kwargs.pop('redis', None)
        if self._redis is None:
            self._redis = StrictRedis(host=config.redis_host,
                                      port=config.redis_port)
        now = datetime.now()
        self._last_index_time = now
        self._last_modified = now
        self.objects = self.xml_dict('objects')
        self.parts = self.json_dict('parts')
        self.storage = FileStorage(os.path.join(self._dir, self._name))
        try:
            self.index = self.storage.open_index(schema=self.schema)
        except BaseException as ex:
            log.warn(ex)
            self.storage.create()
            self.index = self.storage.create_index(self.schema)
            self._reindex()

Example #27

0

Show file

 def __init__(self,
              prep_dsets,
              metadata_dir,
              process_pool,
              keywords_wildcard='*'):
     """
         Initializes the class and sets the list of supported datasets.
         Arguments:
             metadata_dir: Directory where to look for the metadata files.
             prep_dsets: Dictionary of supported datasets. The keys of this
                         dictionary are used to find the subfolder within
                         metadata_dir where the metadata of each dataset
                         should be stored.
             process_pool: instance of CpProcessPool, used to support multi-threading
             keywords_wildcard: wildcard character for keyword-based search. It should be JUST ONE CHARACTER and cannot be '#'.
     """
     self.fname2meta = {}
     self.keyword2fname = {}
     self.metadata_dir = metadata_dir
     self.process_pool = process_pool
     self.keywords_wildcard = keywords_wildcard
     # load metadata for each dataset
     self.metaindex = None
     self.is_all_metadata_loaded = False
     found_a_csv = False
     for (dset, pretty) in prep_dsets.items():
         self.fname2meta[dset] = {}
         self.keyword2fname[dset] = {}
         try:
             # check there is at least one csv
             if not found_a_csv:
                 for afile in os.listdir(
                         os.path.join(self.metadata_dir, dset)):
                     if afile.endswith(".csv"):
                         found_a_csv = True
                         break
             # create index, if not present
             self.index_dir = os.path.join(self.metadata_dir, 'indexdir')
             create_index = False
             if found_a_csv and not os.path.exists(self.index_dir):
                 os.mkdir(self.index_dir)
                 schema = Schema(
                     key=KEYWORD(stored=True),
                     dataset=TEXT)  # In the future, this migth be needed if
                 # using multiple datasets
                 self.metaindex = create_in(self.index_dir, schema)
                 create_index = True
             # load the old one, if found
             if found_a_csv and os.path.exists(self.index_dir):
                 self.metaindex = open_dir(self.index_dir)
             # start thread to load all metadata
             self.process_pool.apply_async(func=self.load_all_dset_metadata,
                                           args=(
                                               dset,
                                               create_index,
                                           ))
         except Exception as e:
             print("Error while pre-loading metadata for " + dset + ": " +
                   str(e) + '\n')

Example #28

0

Show file

File: manga.py Project: dpsommer/manga-reader

class MangaSchema(SchemaClass):
    title = TEXT(stored=True)
    author = TEXT
    artist = TEXT
    description = TEXT
    tags = KEYWORD(lowercase=True, commas=True, scorable=True)
    completed = BOOLEAN
    url = ID(stored=True)

Example #29

0

Show file

def get_schema():
    return Schema(titulo=TEXT(stored=True),
                  plataformas=TEXT(stored=True),
                  desarrollador=ID(stored=True),
                  generos=TEXT(stored=True),
                  url_juego=ID(stored=True),
                  jugadores=KEYWORD(stored=True),
                  url_imagen=ID(stored=True))

Example #30

0

Show file

 def __init__(self):
     self.schema = Schema(scopes=KEYWORD(),
                          descr=TEXT(),
                          service_name=TEXT(),
                          service_descr=TEXT(),
                          keywords=KEYWORD())
     self.schema.add("object_id", ID(stored=True, unique=True))
     self.schema.add("entity_id", ID(stored=True, unique=True))
     for a in ATTRS.keys():
         self.schema.add(a, KEYWORD())
     self._collections = set()
     from whoosh.filedb.filestore import RamStorage, FileStorage
     self.storage = RamStorage()
     self.storage.create()
     self.index = self.storage.create_index(self.schema)
     self.objects = dict()
     self.infos = dict()