def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith( CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format( doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def test_get_doc_id_type_nomatch(self): id = uuid.uuid4().hex doc = { '_id': id, 'domain': 'nomatch-domain', 'doc_type': 'match-type', } self.db.save_doc(doc) ids = get_doc_ids('match-domain', 'match-type') self.assertEqual(0, len(ids)) self.db.delete_doc(doc)
def handle(self, *args, **options): domain = args[0] old_db = Domain.get_db() new_db = RepeatRecord.get_db() assert old_db.dbname != new_db.dbname doc_ids = get_doc_ids(domain, 'RepeatRecord', old_db) count = len(doc_ids) chunksize = 250 for i, docs in enumerate(chunked(iter_docs(old_db, doc_ids, chunksize), chunksize)): for doc in docs: if '_rev' in doc: del doc['_rev'] new_db.bulk_save(docs, new_edits=False) print 'checked %s / %s' % (i * chunksize, count)
def rebuild_indicators(indicator_config_id): config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): if config.filter.filter(doc): try: adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) adapter.engine.dispose()
def handle(self, *args, **options): domain = args[0] db = Domain.get_db() doc_ids = get_doc_ids(domain, "RepeatRecord", db) count = len(doc_ids) print "found %s doc ids" % count latest = datetime.min latest_doc = None for i, doc in enumerate(iter_docs(db, doc_ids)): wrapped = RepeatRecord.wrap(doc) if i % 100 == 0: print "checked %s / %s" % (i, count) if wrapped.last_checked and wrapped.last_checked > latest: latest = wrapped.last_checked latest_doc = wrapped print "new latest: %s" % latest if latest_doc: print "latest repeater date is %s" % latest print "latest repeater is %s" % latest_doc._id else: print "no relevant repeaters found"
def handle(self, *args, **options): domain = args[0] db = Domain.get_db() doc_ids = get_doc_ids(domain, 'RepeatRecord', db) count = len(doc_ids) print 'found %s doc ids' % count latest = datetime.min latest_doc = None for i, doc in enumerate(iter_docs(db, doc_ids)): wrapped = RepeatRecord.wrap(doc) if i % 100 == 0: print 'checked %s / %s' % (i, count) if wrapped.last_checked and wrapped.last_checked > latest: latest = wrapped.last_checked latest_doc = wrapped print 'new latest: %s' % latest if latest_doc: print 'latest repeater date is %s' % latest print 'latest repeater is %s' % latest_doc._id else: print 'no relevant repeaters found'
def test_get_doc_ids_initial_empty(self): self.assertEqual(0, len(get_doc_ids('some-domain', 'some-doc-type')))