def index_gremien(): _solr = solr() for gremium in Gremium.query: log.info("indexing %s..." % gremium.name) data = flatten(gremium.to_dict()) data.update(type_info(gremium)) data = convert_dates(data) _solr.add_many([data]) _solr.commit()
def index_dokumente(): _solr = solr() for dokument in Dokument.query: log.info("indexing %s..." % dokument.name) data = flatten(dokument.to_dict()) data.update(type_info(dokument)) data = convert_dates(data) _solr.add_many([data]) _solr.commit()
def index_persons(): _solr = solr() for person in Person.query: log.info("indexing %s..." % person.name) data = flatten(person.to_dict()) data.update(type_info(person)) data = convert_dates(data) _solr.add_many([data]) _solr.commit()
def index_positionen(): _solr = solr() for position in Position.query: log.info("indexing %s - %s..." % ( position.ablauf.titel, position.fundstelle)) data = flatten(position.to_dict()) data.update(type_info(position)) data = convert_dates(data) _solr.add_many([data]) _solr.commit()
def index(): _solr = solr() #_solr.delete_query("*:*") index_persons() index_gremien() index_positionen() index_dokumente() index_ablaeufe() index_sitzungen() index_debatten() index_zitate()
def index_ablaeufe(): _solr = solr() for ablauf in Ablauf.query: log.info("indexing %s..." % ablauf.titel) data = ablauf.to_dict() data['positionen'] = [p.to_dict() for p in \ ablauf.positionen] data['date'] = max([p['date'] for p in data['positionen']]) data = flatten(data) data.update(type_info(ablauf)) data = convert_dates(data) _solr.add_many([data]) _solr.commit()
def index_dokumente(): _solr = solr() datas = [] for dokument in Dokument.query.yield_per(1000): log.info("indexing %s..." % dokument.name) data = flatten(dokument.to_dict()) data.update(type_info(dokument)) data = convert_dates(data) datas.append(data) if len(datas) % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() _solr.add_many(datas) _solr.commit() datas = [] _solr.commit()
def search(entity, offset, query): if offset is not None: offset = offset.isoformat().rsplit(".")[0] + "Z" else: offset = '*' name = entity.__name__.lower() results = solr().raw_query(q=query, fq=["+index_type:%s" % name, "+date:[%s TO *]" % offset], sort="date desc", rows=1000, wt="json", fl="id") results = json.loads(results) results = [d.get('id') for d in \ results.get('response', {}).get('docs', [])] if len(results): results = db.session.query(entity).filter(entity.id.in_(results)).all() return results
def _run(self): query = { 'q': self.q if self.has_query else '*:*', 'fq': self.fq, 'rows': self._limit, 'start': self._offset, 'facet': 'true', 'facet_limit': 100, 'facet_mincount': 1, 'facet_sort': 'count', 'facet_field': self._facets, 'wt': 'json', 'fl': 'id score', 'sort': self._sort } response = solr().raw_query(**query) self.results = json.loads(response)
def index_sitzungen(): _solr = solr() datas = [] for sitzung in Sitzung.query: log.info("indexing %s..." % sitzung.titel) data = sitzung.to_dict() #data['zitate'] = [z.to_dict() for z in sitzung.zitate] data = flatten(data) data.update(type_info(sitzung)) data = convert_dates(data) data = convert_text(data) datas.append(data) if len(datas) % 5 == 0: _solr.add_many(datas) datas = [] _solr.add_many(datas) _solr.commit()
def index_positionen(): _solr = solr() datas = [] for position in Position.query.yield_per(1000): log.info("indexing %s - %s..." % ( position.ablauf.titel, position.fundstelle)) data = flatten(position.to_dict()) data.update(type_info(position)) data = convert_dates(data) datas.append(data) if len(datas) % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() _solr.add_many(datas) _solr.commit() datas = [] _solr.commit()
def index_debatten(): _solr = solr() datas = [] for debatte in Debatte.query: log.info("indexing %s..." % debatte.titel) data = debatte.to_dict() #data['zitate'] = [dz.zitat.to_dict() for dz in \ # debatte.debatten_zitate] data = flatten(data) data.update(type_info(debatte)) data = convert_dates(data) data = convert_text(data) datas.append(data) if len(datas) % 20 == 0: _solr.add_many(datas) datas = [] _solr.add_many(datas) _solr.commit()
def search(entity, offset, query): if offset is not None: offset = offset.isoformat().rsplit(".")[0] + "Z" else: offset = '*' name = entity.__name__.lower() results = solr().raw_query( q=query, fq=["+index_type:%s" % name, "+date:[%s TO *]" % offset], sort="date desc", rows=1000, wt="json", fl="id") results = json.loads(results) results = [d.get('id') for d in \ results.get('response', {}).get('docs', [])] if len(results): results = db.session.query(entity).filter(entity.id.in_(results)).all() return results
def index_zitate(): _solr = solr() log.info("indexing transcripts...") datas = [] for zitat in Zitat.query.options( eagerload_all(Zitat.person, Zitat.sitzung, Zitat.debatten_zitate)): data = zitat.to_dict() data = flatten(data) data.update(type_info(zitat)) data = convert_dates(data) data = convert_text(data) datas.append(data) if len(datas) % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() _solr.add_many(datas) _solr.commit() datas = [] _solr.add_many(datas) _solr.commit()
def index_zitate(): _solr = solr() log.info("indexing transcripts...") datas = [] for zitat in Zitat.query.yield_per(5000): data = zitat.to_dict() data['sitzung'] = zitat.sitzung.to_dict() data['debatte'] = zitat.debatte.to_dict() data = flatten(data) data.update(type_info(zitat)) data = convert_dates(data) data = convert_text(data) data['date'] = data.get('sitzung.date') datas.append(data) if len(datas) % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() _solr.add_many(datas) _solr.commit() datas = [] _solr.add_many(datas) _solr.commit()
def index_ablaeufe(): _solr = solr() datas = [] for ablauf in Ablauf.query.yield_per(1000): log.info("indexing %s..." % ablauf.titel) data = ablauf.to_dict() data['positionen'] = [p.to_dict() for p in \ ablauf.positionen] data['positionen'] = [p.to_dict() for p in ablauf.positionen] dates = [p['date'] for p in data['positionen'] if p['date'] is not None] if len(dates): data['date'] = max(dates) data = flatten(data) data.update(type_info(ablauf)) data = convert_dates(data) datas.append(data) if len(datas) % 500 == 0: sys.stdout.write(".") sys.stdout.flush() _solr.add_many(datas) _solr.commit() datas = [] _solr.commit()
def dumpindex(): """ Destroy the FTS index. """ _solr = solr() _solr.delete_query("*:*") _solr.commit()
def __init__(self, buffer_size=1000): self._buffer_size = buffer_size self._lock = Lock() self._buffer = [] self._solr = solr()