def new_field(self, field_name: str, field_data): """ Add a new field. If the schema is not yet defined the writer will add the field_name inside the schema Args: field_name (str): Name of the new field field_data: Data to put into the field """ if not self.__schema_defined: self.__writer.add_field(field_name, KEYWORD(stored=True, vector=Frequency())) self.__doc[field_name] = field_data
def get_schema(): return Schema( id=NUMERIC(stored=True, unique=True, numtype=int), title=TEXT(stored=True), content=TEXT(), correspondent=TEXT(stored=True), tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), type=TEXT(stored=True), created=DATETIME(stored=True, sortable=True), modified=DATETIME(stored=True, sortable=True), added=DATETIME(stored=True, sortable=True), )
def build_schema(self, fields): schema_fields = { ID: WHOOSH_ID(stored=True, unique=True), DJANGO_CT: WHOOSH_ID(stored=True), DJANGO_ID: WHOOSH_ID(stored=True), } # Grab the number of keys that are hard-coded into Haystack. # We'll use this to (possibly) fail slightly more gracefully later. initial_key_count = len(schema_fields) content_field_name = '' for field_name, field_class in fields.items(): if field_class.is_multivalued: if field_class.indexed is False: schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost) else: schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost) elif field_class.field_type in ['date', 'datetime']: schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True) elif field_class.field_type == 'integer': schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost) elif field_class.field_type == 'float': schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost) elif field_class.field_type == 'boolean': # Field boost isn't supported on BOOLEAN as of 1.8.2. schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored) elif field_class.field_type == 'ngram': schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost) elif field_class.field_type == 'edge_ngram': schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost) else: # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True) # 中文分词 schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True) if field_class.document is True: content_field_name = field_class.index_fieldname schema_fields[field_class.index_fieldname].spelling = True # Fail more gracefully than relying on the backend to die if no fields # are found. if len(schema_fields) <= initial_key_count: raise SearchBackendError( "No fields were found in any search_indexes. Please correct this before attempting to search.") return (content_field_name, Schema(**schema_fields))
def create_index(self): self.out("Creating directory %s" % self.INDEX) os.mkdir(self.INDEX) schema = Schema( id=ID(stored=True, unique=True), title=TEXT(stored=True), # Title to show card=STORED, # Object card content=TEXT, # Searchable content tags=KEYWORD(stored=True, commas=True, scorable=True), url=STORED) self.out("Creating index directory") create_in(self.INDEX, schema)
def almacenar_datos(): # define el esquema de la información schem = Schema(nombre=TEXT(stored=True), edad=NUMERIC(stored=True), altura=NUMERIC(stored=True), nacionalidad=KEYWORD(stored=True, commas=True), pie=TEXT(stored=True), posicion_principal=KEYWORD(stored=True, commas=True), posicion_secundaria=KEYWORD(stored=True, commas=True), valor=NUMERIC(stored=True), equipo=TEXT(stored=True), contrato=DATETIME(stored=True)) # eliminamos el directorio del índice, si existe if os.path.exists("Index"): shutil.rmtree("Index") os.mkdir("Index") # creamos el índice ix = create_in("Index", schema=schem) # creamos un writer para poder añadir documentos al indice writer = ix.writer() i = 0 lista = almacenar_datos_bs() for jugador in lista: # añade cada pelicula de la lista al índice writer.add_document(nombre=str(jugador[0]), edad=jugador[1], altura=float(jugador[2]), nacionalidad=str(jugador[3]), pie=str(jugador[4]), posicion_principal=str(jugador[5]), posicion_secundaria=str(jugador[6]), valor=float(jugador[7]), equipo=str(jugador[8]), contrato=jugador[9]) i += 1 writer.commit() print("Se han indexado " + str(i) + " jugadores")
def _get_schema(self): stem_ana = StemmingAnalyzer() return Schema( list_name=ID(stored=True), message_id=ID(stored=True), sender=TEXT(field_boost=1.5), user_id=TEXT, subject=TEXT(field_boost=2.0, analyzer=stem_ana), content=TEXT(analyzer=stem_ana), date=DATETIME(), attachments=TEXT, tags=KEYWORD(commas=True, scorable=True), )
class Schema(SchemaClass): #: The id of the job. id = ID(stored=True, unique=True) #: The title of the job. title = TEXT(analyzer=stemming_analyzer) #: The name of the company. company = TEXT(analyzer=stemming_analyzer) #: Location as a comma-separated string of city and country. location = KEYWORD(lowercase=True, scorable=True, commas=True) #: The type of job. job_type = TEXT(analyzer=stemming_analyzer) #: The job tags as a comma-separated string of tag slugs. tags = KEYWORD(lowercase=True, scorable=True, commas=True) #: When was this job created? created = DATETIME(sortable=True)
def get_index(dirpath, verbose=False): try: return open_dir(dirpath) except: pass if verbose: print 'Creating new index in', dirpath if not os.path.exists(dirpath): os.mkdir(dirpath) schema = Schema(name=ID(stored=True, unique=True), doc=TEXT(stored=True), modulepath=KEYWORD(commas=True)) return create_in(dirpath, schema)
def index(corpusPath, name, tweetTime = None, stored = False, overwrite = True, procs = PROC_NUM, limitmb = MEM_SIZE):#, featureExtractor): """Indexing of the status of tweets.""" dirList = os.listdir(corpusPath) schema = Schema(id = ID(stored = True, unique = True), user = ID, http = NUMERIC, # http state date = DATETIME(stored = stored), # tweet date status = TEXT(stored = stored), # status text of the tweet #TODO use a proper analyzer hashtags = KEYWORD(stored = stored) # list of hashtags in the status #replies = KEYWORD, # list of user replies in the status, as users #vector = STORED #score = NUMERIC(stored = True), # static score for ranking #retweets = NUMERIC(type = type(1.), stored = True) # number of retweets of this tweet ## next fields to fill on a second indexer pass ## #retweets = KEYWORD, # list of retweets in the status, as tweet ids #retweeteds = KEYWORD # list of tweets which retweet this tweet, as tweet ids ) indexPath = getIndexPath(name, tweetTime) if not os.path.exists(indexPath): os.makedirs(indexPath) else: if not overwrite: return shutil.rmtree(indexPath) os.makedirs(indexPath) ix = whoosh.index.create_in(indexPath, schema) writer = ix.writer(procs = PROC_NUM, limitmb = MEM_SIZE) for fName in dirList: #if tweetTime and dateFromFileName(fName) > tweetTime: # continue #print fName for tweet in iterTweets(os.path.join(corpusPath, fName)): if tweetTime and int(tweet[0]) > tweetTime: continue if tweet[2] != '302': #and not 'RT @' in tweet[4]: # FIXME retweet filtering #v = featureExtractor(tweet[4].encode('ascii', 'replace')) writer.add_document(id = tweet[0], user = tweet[1], http = int(tweet[2]), date = tweet[3], status = tweet[4], hashtags = u' '.join(tweet[5]) #replies = u' '.join(tweet[6]), #vector = repr(v) ) writer.commit()
def __init__(self): chfilter = CharsetFilter(accent_map) stoplist = stoplists["en"].union(stoplists["fr"]) analyzer = RegexTokenizer() | LowercaseFilter() | \ StopFilter(stoplist=stoplist) | chfilter # defines the schema # see http://pythonhosted.org/Whoosh/schema.html for reference keywordType = KEYWORD(lowercase=True, scorable=True) self.schema = Schema(content=TEXT(analyzer=analyzer), docType=TEXT, docId=ID(stored=True, unique=True), tags=keywordType) # Adds dynamic fields so each documents can index its fields in the # same Whoosh index self.schema.add('*_string', TEXT(analyzer=analyzer), glob=True) self.schema.add('*_date', DATETIME, glob=True) self.schema.add('*_number', NUMERIC, glob=True) self.schema.add('*_boolean', BOOLEAN, glob=True) # Creates the index folder and Whoosh index files if it doesn't exist # And loads the index in any case if not os.path.exists("indexes"): os.mkdir("indexes") self.index = index.create_in("indexes", self.schema) else: self.index = index.open_dir("indexes") # Creates the doctypes folder if it doesn't exist if not os.path.exists("doctypes"): os.mkdir("doctypes") # Creates the doctypes default schema file if it doesn't exist if not os.path.exists('doctypes/doctypes_schema.json'): with open('doctypes/doctypes_schema.json', 'w') as defaultFile: defaultFile.write("{}") ''' Loads the doctypes schema if it's valid, otherwise recreates it Doctypes schema is a dictionary of doctypes with their fields created and updated when a document is indexed. That way, we can tell Whoosh which fields to search by default, because there is apparently no way to say "search in all fields". ''' with open('doctypes/doctypes_schema.json', 'r+') as rawJSON: try: self.doctypesSchema = json.load(rawJSON) except ValueError: rawJSON.write("{}") self.doctypesSchema = {}
def test_update_tags(self, create_dir, pk, tags): if create_dir: os.makedirs(self._path) # Create schema schema = Schema(id=ID(stored=True, unique=True), tags=KEYWORD(stored=True), named_tags=KEYWORD(stored=True)) # Create index index = create_in(self._path, schema) index_writer = index.writer() index_writer.add_document(id=unicode(pk), tags=unicode('test1 test2'), named_tags=unicode('test1 test2')) index_writer.commit() offering = MagicMock() offering.pk = pk offering.save = MagicMock() tag_man = tag_manager.TagManager(index_path=self._path) tag_man.update_tags(offering, tags) self.assertEquals(offering.tags, tags) # Query the index index = open_dir(self._path) with index.searcher() as searcher: query = QueryParser('id', index.schema).parse(unicode(pk)) val = searcher.search(query) self.assertEquals(len(val), 1) self.assertEquals(val[0]['id'], unicode(pk)) ret_tags = val[0]['tags'].split(' ') self.assertEquals(len(tags), len(ret_tags)) for t in tags: self.assertTrue(t in ret_tags)
class _DefaultSearchSchema(SchemaClass): """General search schema.""" object_key = ID(stored=True, unique=True) id = NUMERIC(bits=64, signed=False, stored=True) object_type = ID(stored=True) creator = ID(stored=True) owner = ID(stored=True) #: security index. This list roles and user/group ids allowed to *see* this #: content allowed_roles_and_users = KEYWORD(stored=True) #: tags indexing tag_ids = KEYWORD(stored=True) tag_text = TEXT(analyzer=accent_folder) # hierarchical index of ids path ('/' is the separator) parent_ids = FieldType(format=Existence(), analyzer=PathTokenizer(), stored=True) name = TEXT(stored=True, analyzer=accent_folder) slug = ID(stored=True) description = TEXT(stored=True, analyzer=accent_folder) text = TEXT(analyzer=accent_folder)
def get_schema(): # Whoosh schema - for ease of use match names with record keys used in gutenberg_rdf_parser # Spelling attribute will cause columns to be used as source of query correction suggestions # Analyzers can be used to provide fuzzy matches to searches. However, # the side effect seems to be that it polutes the match streams so that # spelling suggestions are meaningless. return wf.Schema(textId=ID(unique=True, stored=True), title=TEXT(stored=True, spelling=True), creator=TEXT(stored=True, spelling=True), contributor=TEXT(stored=True, spelling=True), subject=KEYWORD, language=KEYWORD(stored=True), friendlytitle=TEXT, category=STORED)
def createIndexs(dirName): schema = Schema(id=NUMERIC(sortable=True),views=KEYWORD(stored=True), semtiment=TEXT(stored=True),content=TEXT(stored=True,analyzer=analyzer)) if not os.path.exists(dirName): os.mkdir(dirName) ix = create_in(dirName, schema) dic={} for line in open('Test.csv'): id,content=line.split('\t') dic[id]=content writer = ix.writer() reader=csv.reader(open('result_bs.csv')) for id,view,sem in reader: writer.add_document(id=id,views=view,semtiment=sem,content=dic[id]) p = writer.commit()
def __init__(self, model=None): if model: self.fields = model._meta.get_all_field_names() self.model = model self.fields = set(self.fields) - set(self.exclude) schema_options = {} for field in self.fields: if field == self.pk: schema_options[field] = ID(stored=True, unique=True) elif field in self.keywords: schema_options[field] = KEYWORD(stored=field in self.stored) else: schema_options[field] = TEXT(stored=field in self.stored) self.schema = Schema(**schema_options)
def build_schema(self, fields): schema_fields = { ID: WHOOSH_ID(stored=True, unique=True), DJANGO_CT: WHOOSH_ID(stored=True), DJANGO_ID: WHOOSH_ID(stored=True), } # Grab the number of keys that are hard-coded into Haystack. # We'll use this to (possibly) fail slightly more gracefully later. initial_key_count = len(schema_fields) content_field_name = '' for field_name, field_class in fields.items(): if field_class.is_multivalued: if field_class.indexed is False: schema_fields[field_class.index_fieldname] = IDLIST( stored=True) else: schema_fields[field_class.index_fieldname] = KEYWORD( stored=True, commas=True, scorable=True) elif field_class.field_type in ['date', 'datetime']: schema_fields[field_class.index_fieldname] = DATETIME( stored=field_class.stored) elif field_class.field_type == 'integer': schema_fields[field_class.index_fieldname] = NUMERIC( stored=field_class.stored, type=int) elif field_class.field_type == 'float': schema_fields[field_class.index_fieldname] = NUMERIC( stored=field_class.stored, type=float) elif field_class.field_type == 'boolean': schema_fields[field_class.index_fieldname] = BOOLEAN( stored=field_class.stored) else: schema_fields[field_class.index_fieldname] = TEXT( stored=True, analyzer=StemmingAnalyzer()) if field_class.document is True: content_field_name = field_class.index_fieldname # Fail more gracefully than relying on the backend to die if no fields # are found. if len(schema_fields) <= initial_key_count: raise SearchBackendError( "No fields were found in any search_indexes. Please correct this before attempting to search." ) return (content_field_name, Schema(**schema_fields))
class HiveJobListing(SchemaClass): '''Class to store the details associated with each Hive job''' job_url = ID(stored=True) title = TEXT(stored=True,analyzer=QUERY_ANALYZER) owner = KEYWORD(stored=True) completion_time = DATETIME(stored=True) query = TEXT(stored=True,analyzer=QUERY_ANALYZER) def __init__(self): self.job_url = None self.title = None self.owner = None self.completion_time = None self.query = None def __str__(self): return 'Url: %s, Title: %s, Owner: %s, Time: %s, Query: %s...' % ( self.job_url, self.title, self.owner, self.completion_time, self.query[0:10])
def _mail_schema(self): return Schema( ident=ID(stored=True, unique=True), sender=ID(stored=False), to=KEYWORD(stored=False, commas=True), cc=KEYWORD(stored=False, commas=True), bcc=KEYWORD(stored=False, commas=True), bounced=KEYWORD(stored=False, commas=True), subject=TEXT(stored=False), date=NUMERIC(stored=False, sortable=True, bits=64, signed=False), body=TEXT(stored=False), tag=KEYWORD(stored=True, commas=True), flags=KEYWORD(stored=True, commas=True), raw=TEXT(stored=False))
def create_indexer(doc_directory, index_directory): my_analyzer = RegexTokenizer() | LowercaseFilter() schema = Schema(id=ID(stored=True), title=TEXT(stored=True, analyzer=my_analyzer), summary=TEXT, article=TEXT(analyzer=my_analyzer), keywords=KEYWORD(stored=True, analyzer=my_analyzer), date=DATETIME(stored=True), path=TEXT(stored=True)) if not os.path.exists(index_directory): os.mkdir(index_directory) ix = create_in(index_directory, schema) writer = ix.writer() nt = 0 print("==============================") t1 = time.clock() for dirname, subdirs, files in os.walk(doc_directory): if (files != []): n = 0 for filename in files: filename = os.path.join(dirname, filename) obj = load_json(filename) writer.add_document(id=obj['id'], title=obj['title'], summary=obj['summary'], article=obj['article'], keywords=obj['keywords'], date=obj['date'], path=filename) n += 1 print("{}: {}".format(dirname, n)) nt += n t2 = time.clock() print("==============================") print("Docs: {}, Time: {:.2f}s".format(nt, (t2 - t1))) print("Writing index...") writer.commit() t3 = time.clock() print("Total time: {:.2f}s".format(t3 - t1)) print("==============================")
def __get_index_schema(self): """ :return: ticket index schema """ return Schema(status=ID(stored=True), assignee_id=NUMERIC(stored=True), via=ID(stored=True), description=ID(stored=True), tags=KEYWORD(stored=True, commas=True), url=ID(stored=True), external_id=ID(stored=True), created_at=ID(stored=True), submitter_id=NUMERIC(stored=True), priority=ID(stored=True), due_at=ID(stored=True), organization_id=NUMERIC(stored=True), has_incidents=BOOLEAN(stored=True), id=ID(stored=True), type=ID(stored=True), subject=ID(stored=True))
def _setup(self): self._redis = getattr(self, '_redis', None) if not self._redis: self._redis = redis( ) # XXX test cases won't get correctly unpicked because of this self.schema = Schema(content=NGRAMWORDS(stored=False)) self.schema.add("object_id", ID(stored=True, unique=True)) self.schema.add("entity_id", ID(stored=True, unique=True)) self.schema.add('sha1', ID(stored=True, unique=True)) for a in list(ATTRS.keys()): self.schema.add(a, KEYWORD()) self.objects = self.xml_dict('objects') self.parts = self.json_dict('parts') self.storage = FileStorage(os.path.join(self._dir, self._name)) try: self.index = self.storage.open_index(schema=self.schema) except BaseException as ex: log.warn(ex) self.storage.create() self.index = self.storage.create_index(self.schema) self._reindex()
def get_schema(): analyzer = StemmingAnalyzer(stoplist=STOP) schema = Schema(title=TEXT(stored=True, analyzer=analyzer, sortable=True), url=ID(stored=True), content_length=NUMERIC(stored=True, sortable=True), thread_votecount=NUMERIC(stored=True, sortable=True), vote_count=NUMERIC(stored=True, sortable=True), content=TEXT(stored=True, analyzer=analyzer, sortable=True), tags=KEYWORD(stored=True, commas=True), is_toplevel=BOOLEAN(stored=True), lastedit_date=NUMERIC(stored=True, sortable=True), rank=NUMERIC(stored=True, sortable=True), author=TEXT(stored=True), author_score=NUMERIC(stored=True, sortable=True), author_handle=TEXT(stored=True), author_uid=ID(stored=True), author_url=ID(stored=True), uid=ID(stored=True), type=NUMERIC(stored=True, sortable=True), type_display=TEXT(stored=True)) return schema
def index_graph_description(self, index_name='graphs'): from whoosh.fields import TEXT, ID, NGRAM, NUMERIC, KEYWORD from whoosh.analysis import StemmingAnalyzer, SimpleAnalyzer, IDAnalyzer from whoosh.analysis.filters import LowercaseFilter print 'Building %s index...' % index_name # build a single schema from the fields exposed by the different search # types print '\tSchema:' fields = { 'gid': ID(stored=True), 'description': KEYWORD(lowercase=True, scorable=True) } #fields = {'gid': ID(stored=True), 'description': TEXT(analyzer=SimpleAnalyzer(ur'[.\s]', True))} from whoosh.fields import Schema schema = Schema(**fields) # Create the index schema index = self.recreate_index(index_name, schema) # Add documents to the index print '\tWrite indexes:' writer = index.writer() c = 0 from digipal.models import Graph for graph in Graph.objects.filter( graph_components__isnull=False).prefetch_related( 'graph_components', 'graph_components__component', 'graph_components__features').distinct(): c += 1 doc = { 'gid': unicode(graph.id), 'description': graph.get_serialised_description() } writer.add_document(**doc) print '\t\tIndex %d graphs' % c writer.commit()
def __init__(self, path, index): """Initializes the search engine. Args: path: Path to document root to index index: Path to where the index will be placed. """ self.path = path self.index = index analyzer = NgramWordAnalyzer(2, 4) try: ix = whoosh.index.open_dir(self.index) ix.close() create_index = False # index seems to be working fine except whoosh.index.EmptyIndexError: create_index = True if create_index: schema = Schema( name=TEXT(stored=True, analyzer=StemmingAnalyzer()), link=TEXT(stored=True), category=KEYWORD(stored=True, scorable=True, commas=True, analyzer=analyzer), description=TEXT(stored=True), ) if not os.path.isdir(self.index): os.mkdir(self.index) print("Creating index %s" % os.path.relpath(self.index)) with contextlib.closing(whoosh.index.create_in(self.index, schema)) as ix: self._index(ix, self.path) print("Opening index %s" % self.index) self.ix = whoosh.index.open_dir(self.index)
def handle(self, *args, **kwargs): """ Creates the index iterating over all the pages of the site """ schema = Schema(pk=NUMERIC(unique=True, stored=True), title=TEXT, summary=TEXT, tags=KEYWORD(commas=True, scorable=True), pub_date=DATETIME(sortable=True)) if not os.path.exists(settings.INDEX): os.mkdir(settings.INDEX) ix = create_in(settings.INDEX, schema) writer = ix.writer() objects = Page.objects.all() for object in objects: tags = map(lambda x: x.title, object.tags.all()) writer.add_document(title=object.title, summary=object.summary, tags=",".join(tags), pk=object.pk, pub_date=object.pub_date) writer.commit()
def __init__(self): self.indexDir = "./indexfile" if not os.path.exists(self.indexDir): os.mkdir(self.indexDir) self.schema = Schema(url=TEXT(stored=True, analyzer=StemmingAnalyzer()), title=TEXT(stored=True, analyzer=ChineseAnalyzer()), content=TEXT(stored=True, analyzer=ChineseAnalyzer()), anchors=KEYWORD(stored=True, commas=True), pageRank=NUMERIC(int, 32, sortable=True, stored=True)) self.exists = index.exists_in(self.indexDir, indexname="nkai") if self.exists: self.index = index.open_dir(self.indexDir, indexname="nkai") else: self.index = index.create_in(self.indexDir, schema=self.schema, indexname="nkai")
def get_index(api, recreate=False, must_exist=False): index_dir = api.ftsindex if index_dir.exists(): if recreate: rmtree(index_dir) # pragma: no cover elif must_exist: raise ValueError('No whoosh index found at {0}.'.format(index_dir)) if not index_dir.exists(): index_dir.mkdir() schema = Schema(id=ID(stored=True), provider=KEYWORD(stored=True), authoryear=TEXT(stored=True), title=TEXT(analyzer=StemmingAnalyzer(), stored=True), author=TEXT(stored=True), year=TEXT(stored=True), doctype=TEXT(stored=True), lgcode=TEXT(stored=True), body=TEXT(), tags=KEYWORD) return index.create_in(index_dir.as_posix(), schema) return index.open_dir(index_dir.as_posix())
def create_schema(): schema = Schema(post_id=ID(stored=True), condition=STORED, accession=ID(stored=True, analyzer=myanalyzer), description=TEXT(stored=True, analyzer=myanalyzer), gene=KEYWORD(stored=True, scorable=True, commas=True, analyzer=myanalyzer), fdr=TEXT(stored=True), species=TEXT(stored=True), mw=NUMERIC, peptides=NUMERIC, psm=NUMERIC, uniq_peptides=NUMERIC, abun_t1=STORED, abun_t2=STORED, abun_t3=STORED, abun_t4=STORED, abun_t5=STORED, q_value=NUMERIC, pep=NUMERIC) return schema
def __get_index_schema(self): """ :return: user index schema """ return Schema(id=NUMERIC(stored=True), url=ID(stored=True), external_id=ID(stored=True), name=ID(stored=True), alias=ID(stored=True), created_at=ID(stored=True), active=BOOLEAN(stored=True), verified=BOOLEAN(stored=True), shared=BOOLEAN(stored=True), locale=ID(stored=True), timezone=ID(stored=True), last_login_at=ID(stored=True), email=ID(stored=True), phone=ID(stored=True), signature=ID(stored=True), organization_id=NUMERIC(stored=True), tags=KEYWORD(stored=True, commas=True), suspended=BOOLEAN(stored=True), role=ID(stored=True))
def cargar_correos(): if not os.path.exists(dircorr): print "Error: no existe el directorio de documentos: " + dircorr else: if not os.path.exists(dirindexC): os.mkdir(dirindexC) schema = Schema(remitente=TEXT(stored=True), destinatarios=KEYWORD(stored=True), fecha=DATETIME, asunto=TEXT(stored=True), contenido=TEXT, file=TEXT(stored=True)) ixc = create_in(dirindexC, schema) writer = ixc.writer() i = 0 for docname in os.listdir(dircorr): if not os.path.isdir(dircorr + docname): add_doc(writer, dircorr, docname) i += 1 writer.commit() return i