Beispiel #1
0
def index_venues_from_db():

    venues = db.select(["id", "name"], table="venues")

    index = Index(config.DATA + "index_venues",
                  fields=[
                      DocField("id", stored=True, indexed=False),
                      DocField("name", stored=True, indexed=True)
                  ])

    for vid, vname in venues:
        index.add(id=str(vid), name=vname)

    index.commit()
    print "%d venues added to the index." % len(venues)
Beispiel #2
0
    def add_papers(self, index_folder, include_text=True):

        print "Adding %s documents to index in '%s'" % (len(
            self.pub_ids), index_folder)

        fields = [
            DocField("id", stored=True, indexed=True),
            DocField("title", stored=True, indexed=True),
            DocField("abstract", stored=False, indexed=True)
        ]
        if include_text:
            fields.append(DocField("text", stored=False, indexed=True))

        index = Index(index_folder, fields)
        #		for i, (id, (title, abstract)) in enumerate(self.pubs.items()) :
        for i, pub_id in enumerate(self.pub_ids):

            title, abstract = self.get_texts(pub_id)
            field_values = {'id': pub_id, 'title': title, 'abstract': abstract}

            # Check if we are including to text before loading it
            if include_text:
                with open(os.path.join(config.TXT_PATH % pub_id),
                          "r") as txt_file:
                    text = txt_file.read()
                field_values['text'] = text

            index.add(**field_values)

            # Commit and print progress every 1000 entries
            if i and i % 1000 == 0:
                index.commit()
                log.info("%d documents added." % i)

        index.commit()
        index.close()