Beispiel #1
0
def rebuild_indicators(indicator_config_id):
    is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix)
    if is_static:
        config = CustomDataSourceConfiguration.by_id(indicator_config_id)
    else:
        config = DataSourceConfiguration.get(indicator_config_id)
        # Save the start time now in case anything goes wrong. This way we'll be
        # able to see if the rebuild started a long time ago without finishing.
        config.meta.build.initiated = datetime.datetime.utcnow()
        config.save()

    adapter = IndicatorSqlAdapter(config)
    adapter.rebuild_table()

    couchdb = _get_db(config.referenced_doc_type)
    relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type,
                               database=couchdb)

    for doc in iter_docs(couchdb, relevant_ids, chunksize=500):
        try:
            # save is a noop if the filter doesn't match
            adapter.save(doc)
        except DataError as e:
            logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))

    if not is_static:
        config.meta.build.finished = True
        config.save()
Beispiel #2
0
def rebuild_indicators(indicator_config_id):
    is_static = indicator_config_id.startswith(
        CustomDataSourceConfiguration._datasource_id_prefix)
    if is_static:
        config = CustomDataSourceConfiguration.by_id(indicator_config_id)
    else:
        config = DataSourceConfiguration.get(indicator_config_id)
        # Save the start time now in case anything goes wrong. This way we'll be
        # able to see if the rebuild started a long time ago without finishing.
        config.meta.build.initiated = datetime.datetime.utcnow()
        config.save()

    adapter = IndicatorSqlAdapter(config)
    adapter.rebuild_table()

    couchdb = _get_db(config.referenced_doc_type)
    relevant_ids = get_doc_ids(config.domain,
                               config.referenced_doc_type,
                               database=couchdb)

    for doc in iter_docs(couchdb, relevant_ids, chunksize=500):
        try:
            # save is a noop if the filter doesn't match
            adapter.save(doc)
        except DataError as e:
            logging.exception('problem saving document {} to table. {}'.format(
                doc['_id'], e))

    if not is_static:
        config.meta.build.finished = True
        config.save()
    def test_get_doc_id_type_nomatch(self):
        id = uuid.uuid4().hex
        doc = {
            '_id': id,
            'domain': 'nomatch-domain',
            'doc_type': 'match-type',
}
        self.db.save_doc(doc)
        ids = get_doc_ids('match-domain', 'match-type')
        self.assertEqual(0, len(ids))
        self.db.delete_doc(doc)
    def handle(self, *args, **options):
        domain = args[0]
        old_db = Domain.get_db()
        new_db = RepeatRecord.get_db()
        assert old_db.dbname != new_db.dbname
        doc_ids = get_doc_ids(domain, 'RepeatRecord', old_db)
        count = len(doc_ids)
        chunksize = 250

        for i, docs in enumerate(chunked(iter_docs(old_db, doc_ids, chunksize), chunksize)):
            for doc in docs:
                if '_rev' in doc:
                    del doc['_rev']
            new_db.bulk_save(docs, new_edits=False)
            print 'checked %s / %s' % (i * chunksize, count)
Beispiel #5
0
def rebuild_indicators(indicator_config_id):
    config = DataSourceConfiguration.get(indicator_config_id)
    adapter = IndicatorSqlAdapter(get_engine(), config)
    adapter.rebuild_table()

    couchdb = _get_db(config.referenced_doc_type)
    relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type,
                               database=couchdb)

    for doc in iter_docs(couchdb, relevant_ids, chunksize=500):
        if config.filter.filter(doc):
            try:
                adapter.save(doc)
            except DataError as e:
                logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))
Beispiel #6
0
def rebuild_indicators(indicator_config_id):
    is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix)
    if is_static:
        config = CustomDataSourceConfiguration.by_id(indicator_config_id)
    else:
        config = DataSourceConfiguration.get(indicator_config_id)

    adapter = IndicatorSqlAdapter(get_engine(), config)
    adapter.rebuild_table()

    couchdb = _get_db(config.referenced_doc_type)
    relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type,
                               database=couchdb)

    for doc in iter_docs(couchdb, relevant_ids, chunksize=500):
        try:
            # save is a noop if the filter doesn't match
            adapter.save(doc)
        except DataError as e:
            logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))
    adapter.engine.dispose()
    def handle(self, *args, **options):
        domain = args[0]
        db = Domain.get_db()
        doc_ids = get_doc_ids(domain, "RepeatRecord", db)
        count = len(doc_ids)
        print "found %s doc ids" % count
        latest = datetime.min
        latest_doc = None
        for i, doc in enumerate(iter_docs(db, doc_ids)):
            wrapped = RepeatRecord.wrap(doc)
            if i % 100 == 0:
                print "checked %s / %s" % (i, count)
            if wrapped.last_checked and wrapped.last_checked > latest:
                latest = wrapped.last_checked
                latest_doc = wrapped
                print "new latest: %s" % latest

        if latest_doc:
            print "latest repeater date is %s" % latest
            print "latest repeater is %s" % latest_doc._id
        else:
            print "no relevant repeaters found"
    def handle(self, *args, **options):
        domain = args[0]
        db = Domain.get_db()
        doc_ids = get_doc_ids(domain, 'RepeatRecord', db)
        count = len(doc_ids)
        print 'found %s doc ids' % count
        latest = datetime.min
        latest_doc = None
        for i, doc in enumerate(iter_docs(db, doc_ids)):
            wrapped = RepeatRecord.wrap(doc)
            if i % 100 == 0:
                print 'checked %s / %s' % (i, count)
            if wrapped.last_checked and wrapped.last_checked > latest:
                latest = wrapped.last_checked
                latest_doc = wrapped
                print 'new latest: %s' % latest

        if latest_doc:
            print 'latest repeater date is %s' % latest
            print 'latest repeater is %s' % latest_doc._id
        else:
            print 'no relevant repeaters found'
 def test_get_doc_ids_initial_empty(self):
     self.assertEqual(0, len(get_doc_ids('some-domain', 'some-doc-type')))