def monitor(self, deploy):
        # get the current recovery status
        indexer = rpc.getThriftIndexerClient(deploy.worker.lan_dns, int(deploy.base_port), 10000)
        indexer_status = indexer.getStatus()

        # complain only if an error arised
        return indexer_status != IndexerStatus.error
Beispiel #2
0
    def monitor(self, deploy):
        # get the current recovery status
        indexer = rpc.getThriftIndexerClient(deploy.worker.lan_dns,
                                             int(deploy.base_port), 10000)
        indexer_status = indexer.getStatus()

        # complain only if an error arised
        return indexer_status != IndexerStatus.error
 def monitor(self, deploy):
     deploy.index # so that it fails if the index foreign key is broken 
     try:
         client = rpc.getThriftIndexerClient(deploy.worker.lan_dns, int(deploy.base_port), 5000)
         client.ping()
         return True
     except Exception:
         self.logger.exception("Failed to ping deploy %s for index %s", deploy.id, deploy.index.code)
         self.err_msg = self.describe_error()
         return False
Beispiel #4
0
 def monitor(self, deploy):
     deploy.index  # so that it fails if the index foreign key is broken
     try:
         client = rpc.getThriftIndexerClient(deploy.worker.lan_dns,
                                             int(deploy.base_port), 5000)
         client.ping()
         return True
     except Exception:
         self.logger.exception("Failed to ping deploy %s for index %s",
                               deploy.id, deploy.index.code)
         self.err_msg = self.describe_error()
         return False
    def _handle_recovering(self, deploy):
        logger.debug('Contacting %s (%s) on %d of %s to check if it finished recovering', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns)
        try:
            indexer = rpc.getThriftIndexerClient(deploy.worker.lan_dns, int(deploy.base_port), timeout_ms)
            indexer_status = indexer.getStatus()
            if indexer_status == IndexerStatus.started:
                logger.info('Requesting full recovery for deploy for %s (%s) on %d.', deploy.index.name, deploy.index.code, deploy.base_port)
                indexer.startFullRecovery()
                return DeployManager.INDEX_RECOVERING
            elif indexer_status == IndexerStatus.recovering:
                logger.info("Index %s is in state %s. Waiting untill it's ready", deploy.index.code, indexer_status)
                return DeployManager.INDEX_RECOVERING
            elif indexer_status == IndexerStatus.ready:
                deploy.update_status(Deploy.States.controllable)
                if deploy.index.status == Index.States.waking_up:
                    deploy.index.update_status(Index.States.live)
                mail.report_new_deploy(deploy)
                
                logger.info('Deploy for %s (%s) on %d of %s reports it has finished recovering. New state moved to %s.', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns, Deploy.States.controllable)

                # The following is a HACK to restore twitvid's promotes after its index was moved
                # because we don't record promotes and they are lost after each move.
                # Luckily we know what twitvid promotes, so we can reproduce it here. This will break if they
                # change their code and start promoting something else. GitHub issue #41 calls for a proper
                # implementaion or to remove the feature altogether. Twitvid could now do this by using the 
                # caret operator like this: "name:(q)^100 OR author:(q)^100 OR (q)".

                if deploy.index.code == 'd7fz1':
                    try:
                        searcher = rpc.getThriftSearcherClient(deploy.worker.lan_dns, int(deploy.base_port), timeout_ms)
                        start = 0
                        while True:
                            rs = searcher.search('verified:1 AND cont_type:user', start, 1000, 0, {}, {}, {}, {}, {'fetch_fields':'author,fullname,name'})
                            if len(rs.docs) == 0:
                                break
                            for d in rs.docs:
                                author = d.get('author')
                                if author:
                                    indexer.promoteResult(d['docid'], author.lower().strip())
                                name = d.get('name', d.get('fullname'))
                                if name:
                                    indexer.promoteResult(d['docid'], name.lower().strip())
                            start += len(rs.docs)
                        logger.info('WARNING: HACK! %s promotes were recovered for TwitVid.', start)
                    except Exception, e:
                        logger.error('HACK ERROR: applying TwitVid promotes', e)

                # Phew. End of HACK. Please let's not do this anymore.

                return DeployManager.INDEX_CONTROLLABLE
            elif indexer_status == IndexerStatus.error:
                logger.error('Deploy for %s (%s) on %d of %s reports it has failed recovering. MANUAL INTERVENTION REQUIRED.', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns)
 def _handle_initializing(self, deploy):
     logger.debug('Trying to reach %s (%s) on %d of %s.', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns)
     try:
         indexer = rpc.getThriftIndexerClient(deploy.worker.lan_dns, int(deploy.base_port), timeout_ms)
         indexer.ping()
         # successfully reported stats()
         if deploy.index.status == Index.States.new:
             deploy.update_status(Deploy.States.controllable)
             index = deploy.index
             index.status = Index.States.live
             index.save()
             logger.info('Deploy for %s (%s) on %d of %s contacted. New status moved to %s.', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns, Deploy.States.controllable)
             return DeployManager.INDEX_CONTROLLABLE
         else:
             deploy.update_status(Deploy.States.recovering)
             logger.info('Deploy for %s (%s) on %d of %s contacted. New status moved to %s.', deploy.index.name, deploy.index.code, deploy.base_port, deploy.worker.wan_dns, Deploy.States.recovering)
             return DeployManager.INDEX_RECOVERING
     except Exception, e:
         # not ready yet, we'll leave it as initializing
         logger.info('Index %s unreachable: %s', deploy.index.code, e)
         return DeployManager.INDEX_INITIALIZING
def operations(request):
    #if request.method == 'POST':
    #    if request.POST['task'] == 'redeploy':
    #        id = request.POST['index_id']
    #        rpc.get_deploy_manager().redeploy_index(Index.objects.get(pk=id).code)
    #        return HttpResponseRedirect('/resource_map')
    
    level = request.GET.get('level', 'top')
    if level == 'top':
        return render_to_response('operations/index.html', Context({}, request))
    elif level == 'refresh':
        data = {
            'Config': map(configuration_dict, IndexConfiguration.objects.all()),
            'Account': map(account_dict, Account.objects.select_related('user').all()),
            'Deploy': map(deploy_dict, Deploy.objects.all()),
            'Index': map(index_dict, Index.objects.all()),
            'Package': map(package_dict, Package.objects.all()),
            'Worker': map(worker_dict, Worker.objects.all()),
        }
        return JsonResponse(data)
    elif level == 'index':
        id = request.GET.get('id')
        index = Index.objects.get(pk=id);
        data = {
            'Index': index_dict(index),
            'Deploy': map(deploy_dict, index.deploys.all()),
        }
        return JsonResponse(data)
    elif level == 'stats':
        id = request.GET.get('id')
        d = Deploy.objects.get(pk=id)
        client = rpc.getThriftIndexerClient(d.worker.lan_dns, int(d.base_port), 3000)
        return JsonResponse(client.get_stats())
    elif level == 'log':
        id = request.GET.get('id')
        file = request.GET.get('file')
        d = Deploy.objects.get(pk=id)
        client = rpc.get_worker_controller(d.worker, 4000)
        lines = client.tail(file, 300, d.index.code, d.base_port)
        return JsonResponse(lines)
    elif level == 'redeploy':
        id = request.GET.get('id')
        rpc.get_deploy_manager().redeploy_index(Index.objects.get(pk=id).code)
        return HttpResponse()
    elif level == 'decommission':
        id = request.GET.get('id')
        Worker.objects.filter(id=id).update(status=Worker.States.decommissioning)
        return JsonResponse(worker_dict(Worker.objects.get(id=id))) 
    elif level == 'delete_worker':
        id = request.GET.get('id')
        w = Worker.objects.get(id=id)
        if w.status != Worker.States.decommissioning:
            return HttpResponse('worker not decommissioning', status=409)
        if w.deploys.count():
            return HttpResponse('worker not empty', status=409)
        w.delete()
        return HttpResponse()
    elif level == 'delete_account':
        id = request.GET.get('id')
        a = Account.objects.get(id=id)
        user = a.user.user
        if a.indexes.count():
            return HttpResponse('account has index', status=409)
        if a.payment_informations.count():
            return HttpResponse('account has payment information', status=409)
        user = a.user.user
        a.delete()
        user.delete()
        return HttpResponse()
    elif level == 'account_set_pkg':
        id = request.GET.get('id')
        pid = request.GET.get('pkg')
        p = Package.objects.get(id=pid)
        updated = Account.objects.filter(id=id).update(package=p)
        if updated:
            return JsonResponse(account_dict(Account.objects.get(id=id)))
        else:
            return HttpResponse('account not found', status=409)
    elif level == 'account_set_cfg':
        id = request.GET.get('id')
        cid = request.GET.get('cfg')
        c = IndexConfiguration.objects.get(id=cid)
        updated = Account.objects.filter(id=id).update(configuration=c)
        if updated:
            return JsonResponse(account_dict(Account.objects.get(id=id)))
        else:
            return HttpResponse('account not found', status=409)
    elif level == 'index_set_cfg':
        id = request.GET.get('id')
        cid = request.GET.get('cfg')
        c = IndexConfiguration.objects.get(id=cid)
        updated = Index.objects.filter(id=id).update(configuration=c)
        if updated:
            return JsonResponse(index_dict(Index.objects.get(id=id)))
        else:
            return HttpResponse('index not found', status=409)
    return HttpResponseNotFound()
Beispiel #8
0
def operations(request):
    #if request.method == 'POST':
    #    if request.POST['task'] == 'redeploy':
    #        id = request.POST['index_id']
    #        rpc.get_deploy_manager().redeploy_index(Index.objects.get(pk=id).code)
    #        return HttpResponseRedirect('/resource_map')

    level = request.GET.get('level', 'top')
    if level == 'top':
        return render_to_response('operations/index.html', Context({},
                                                                   request))
    elif level == 'refresh':
        data = {
            'Config':
            map(configuration_dict, IndexConfiguration.objects.all()),
            'Account':
            map(account_dict,
                Account.objects.select_related('user').all()),
            'Deploy':
            map(deploy_dict, Deploy.objects.all()),
            'Index':
            map(index_dict, Index.objects.all()),
            'Package':
            map(package_dict, Package.objects.all()),
            'Worker':
            map(worker_dict, Worker.objects.all()),
        }
        return JsonResponse(data)
    elif level == 'index':
        id = request.GET.get('id')
        index = Index.objects.get(pk=id)
        data = {
            'Index': index_dict(index),
            'Deploy': map(deploy_dict, index.deploys.all()),
        }
        return JsonResponse(data)
    elif level == 'stats':
        id = request.GET.get('id')
        d = Deploy.objects.get(pk=id)
        client = rpc.getThriftIndexerClient(d.worker.lan_dns, int(d.base_port),
                                            3000)
        return JsonResponse(client.get_stats())
    elif level == 'log':
        id = request.GET.get('id')
        file = request.GET.get('file')
        d = Deploy.objects.get(pk=id)
        client = rpc.get_worker_controller(d.worker, 4000)
        lines = client.tail(file, 300, d.index.code, d.base_port)
        return JsonResponse(lines)
    elif level == 'redeploy':
        id = request.GET.get('id')
        rpc.get_deploy_manager().redeploy_index(Index.objects.get(pk=id).code)
        return HttpResponse()
    elif level == 'decommission':
        id = request.GET.get('id')
        Worker.objects.filter(id=id).update(
            status=Worker.States.decommissioning)
        return JsonResponse(worker_dict(Worker.objects.get(id=id)))
    elif level == 'delete_worker':
        id = request.GET.get('id')
        w = Worker.objects.get(id=id)
        if w.status != Worker.States.decommissioning:
            return HttpResponse('worker not decommissioning', status=409)
        if w.deploys.count():
            return HttpResponse('worker not empty', status=409)
        w.delete()
        return HttpResponse()
    elif level == 'delete_account':
        id = request.GET.get('id')
        a = Account.objects.get(id=id)
        user = a.user.user
        if a.indexes.count():
            return HttpResponse('account has index', status=409)
        if a.payment_informations.count():
            return HttpResponse('account has payment information', status=409)
        user = a.user.user
        a.delete()
        user.delete()
        return HttpResponse()
    elif level == 'account_set_pkg':
        id = request.GET.get('id')
        pid = request.GET.get('pkg')
        p = Package.objects.get(id=pid)
        updated = Account.objects.filter(id=id).update(package=p)
        if updated:
            return JsonResponse(account_dict(Account.objects.get(id=id)))
        else:
            return HttpResponse('account not found', status=409)
    elif level == 'account_set_cfg':
        id = request.GET.get('id')
        cid = request.GET.get('cfg')
        c = IndexConfiguration.objects.get(id=cid)
        updated = Account.objects.filter(id=id).update(configuration=c)
        if updated:
            return JsonResponse(account_dict(Account.objects.get(id=id)))
        else:
            return HttpResponse('account not found', status=409)
    elif level == 'index_set_cfg':
        id = request.GET.get('id')
        cid = request.GET.get('cfg')
        c = IndexConfiguration.objects.get(id=cid)
        updated = Index.objects.filter(id=id).update(configuration=c)
        if updated:
            return JsonResponse(index_dict(Index.objects.get(id=id)))
        else:
            return HttpResponse('index not found', status=409)
    return HttpResponseNotFound()