def index_venues_from_db(): venues = db.select(["id", "name"], table="venues") index = Index(config.DATA + "index_venues", fields=[ DocField("id", stored=True, indexed=False), DocField("name", stored=True, indexed=True) ]) for vid, vname in venues: index.add(id=str(vid), name=vname) index.commit() print "%d venues added to the index." % len(venues)
def add_papers(self, index_folder, include_text=True): print "Adding %s documents to index in '%s'" % (len( self.pub_ids), index_folder) fields = [ DocField("id", stored=True, indexed=True), DocField("title", stored=True, indexed=True), DocField("abstract", stored=False, indexed=True) ] if include_text: fields.append(DocField("text", stored=False, indexed=True)) index = Index(index_folder, fields) # for i, (id, (title, abstract)) in enumerate(self.pubs.items()) : for i, pub_id in enumerate(self.pub_ids): title, abstract = self.get_texts(pub_id) field_values = {'id': pub_id, 'title': title, 'abstract': abstract} # Check if we are including to text before loading it if include_text: with open(os.path.join(config.TXT_PATH % pub_id), "r") as txt_file: text = txt_file.read() field_values['text'] = text index.add(**field_values) # Commit and print progress every 1000 entries if i and i % 1000 == 0: index.commit() log.info("%d documents added." % i) index.commit() index.close()