Exemple #1
0

class Wwan0Final:
    """Maintenance"""
    def run(self):
        return trigger_maintenance("internal modem is not running")


class RestartNL:
    """restart network-listener"""
    def run(self):
        shell("systemctl restart network-listener")


class Wwan0(module.BasicModule):
    """Confirm internal modem is up and running"""

    repairs = [RestartNL()]
    final = Wwan0Final()

    def run(self):
        mode = shell(
            "curl -s http://localhost:88/modems|jq '.[]|select(.ifname == \"wwan0\")|.mode'"
        )
        if mode == "null":
            return False
        return True


register.put(Wwan0())
Exemple #2
0
    """reinstall marvin service"""

    def run(self):
        shell("apt-get install -y --force-yes --reinstall python-marvin", timeout=60)

class MarvinService (module.BasicModule):
    """marvin service"""

    repairs = [RestartMarvin()]
    final   = MarvinFinal()

    def run(self):
        # first check if it configured, if no, ignore 
        ls = shell("ls /etc/marvind.conf")
        if "No such file" in ls:
            return True
        # and we have keys
        ls = shell("ls /etc/keys/marvind")
        if "No such file" in ls:
            return True
        ps = shell("ps ax|grep marvind")
        if not "bin/marvind" in ps: 
            return False
        # is it a service? 
        status =  shell("systemctl status marvind")
        if not "running" in status: 
            return False
        return True

register.put(MarvinService())
class RestartNL:
    """enable & restart network-listener service"""
    def run(self):
        shell("systemctl enable network-listener")
        shell("systemctl restart network-listener")

class ReinstallNL:
    """reinstall network-listener service"""

    def run(self):
        shell("apt-get install -y --force-yes --reinstall network-listener", timeout=60)

class NLService (module.BasicModule):
    """network-listener service"""

    repairs = [RestartNL(), ReinstallNL()]
    final   = NLFinal()

    def run(self):
        # is network listener running
        ps =  shell("ps ax|grep listener")
        if not "network-listener" in ps: 
            return False
        # is it a service? 
        status =  shell("systemctl status network-listener")
        if not "running" in status: 
            return False
        return True

register.put(NLService())
Exemple #4
0
    """restart dlb to adjust ip rules, if they do not match"""
    def run(self):
        ru9999 = shell("ip ru show pref 9999").split()[-1]
        ru90001 = shell("ip ru show pref 90001|head -n 1").split()[-1]
        if ru9999 != ru90001:
            shell("systemctl restart dlb")


class Autotunnel(module.BasicModule):
    """Backend reachable via autotunnel"""

    repairs = [RestartDlb()]
    final = AutotunnelFinal()

    def run(self):
        """check if we can successfully reach the backend"""

        tunnel = shell(
            'ssh -o StrictHostKeychecking=no -i $BACKEND_SSH_KEY -o ConnectTimeout=5 -o BatchMode=yes -o UserKnownHostsFile=/dev/null $BACKEND_SSH_USER@$BACKEND_SSH_SERVER echo success',
            source='/etc/default/autotunnel')
        if not "success" in tunnel:
            return False

        # store last successful connection in a file
        shell('mkdir -p /var/lib/biteback/')
        shell('date +%s > /var/lib/biteback/autotunnel.last')
        return True


register.put(Autotunnel())
Exemple #5
0
        shell("systemctl enable network-listener")
        shell("systemctl restart network-listener")


class ReinstallNL:
    """reinstall network-listener service"""
    def run(self):
        shell("apt-get install -y --force-yes --reinstall network-listener",
              timeout=60)


class NLService(module.BasicModule):
    """network-listener service"""

    repairs = [RestartNL(), ReinstallNL()]
    final = NLFinal()

    def run(self):
        # is network listener running
        ps = shell("ps ax|grep listener")
        if not "network-listener" in ps:
            return False
        # is it a service?
        status = shell("systemctl status network-listener")
        if not "running" in status:
            return False
        return True


register.put(NLService())
Exemple #6
0
from biteback import module, register
from biteback.util import shell, trigger_maintenance

class AtqFinal:
    """Maintenance"""

    def run(self):
        return trigger_maintenance()

class CleanAtqSpool:
    """clean the atq spool and reboot marvind"""
    def run(self):
        shell("rm /var/spool/cron/atjobs/=*")
        shell("mv /var/log/marvind.log /var/log/marvind.log.atq")
        shell("systemctl restart marvind")

class Atq (module.BasicModule):
    """no jobs are stuck in atq"""

    repairs = [CleanAtqSpool()]
    final   = AtqFinal()

    def run(self):
        running = int(shell("atq | grep = | wc -l"))
        if running > 1:
            return False
        return True

register.put(Atq())
Exemple #7
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_maintenance

class NginxFinal:
    """Maintenance"""

    def run(self):
        return trigger_maintenance("Nginx is not running.")

class RestartNginx:
    """enable & restart nginx service"""
    def run(self):
        shell("systemctl enable nginx")
        shell("systemctl restart nginx")

class NginxService (module.BasicModule):
    """nginx service"""

    repairs = [RestartNginx()]
    final   = NginxFinal()

    def run(self):
        status =  shell("systemctl status nginx")
        if not "running" in status:
            return False
        return True

register.put(NginxService())
class RestartTunnel:
    """enable & restart autotunnel service"""
    def run(self):
        shell("systemctl enable autotunnel")
        shell("systemctl restart autotunnel")

class ReinstallTunnel:
    """reinstall autotunnel service"""

    def run(self):
        shell("apt-get install -y --force-yes --reinstall autotunnel")

class AutotunnelService (module.BasicModule):
    """Autotunnel systemd service"""

    repairs = [RestartTunnel(), ReinstallTunnel()]
    final   = AutotunnelFinal()

    def run(self):
        # is the watcher running
        ps =  shell("ps ax|grep autotunnel")
        if not "watcher" in ps:
            return False
        status = shell("systemctl status autotunnel")
        if not "running" in status: 
            return False
        return True

register.put(AutotunnelService())
Exemple #9
0
    def run(self):
        shell("systemctl enable dlb")
        shell("systemctl restart dlb")


class ReinstallDLB:
    """reinstall dlb service"""
    def run(self):
        shell("apt-get install -y --force-yes --reinstall dlb", timeout=60)


class DLBService(module.BasicModule):
    """dlb service"""

    repairs = [RestartDLB(), ReinstallDLB()]
    final = DLBFinal()

    def run(self):
        # is network listener running
        ps = shell("ps ax|grep dlb")
        if not "sbin/dlb" in ps:
            return False
        # is it a service?
        status = shell("systemctl status dlb")
        if not "running" in status:
            return False
        return True


register.put(DLBService())
Exemple #10
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_maintenance

class ProcessesFinal:
    """Reboot"""
    # the most reliable way to mediate fork bombs
    # smarter fixes should be set in limits.conf

    def run(self):
        return trigger_reboot()

class Processes(module.BasicModule):
    """Open processes"""
    # should be limited by container setup as well

    repairs = []
    final   = ProcessesFinal()

    def run(self):
        pcount = int(shell("ps -AL --no-headers|wc -l"))
        if pcount > 5000:
            return False
        return True

register.put(Processes())
Exemple #11
0
    def run(self):
        shell("docker stop -t 0 $(docker ps -q)", timeout=60)
        shell("systemctl stop docker", timeout=60)
        shell("rm -r /var/lib/docker", timeout=60)
        shell("apt-get remove docker-engine", timeout=120)


class HddUsage(module.BasicModule):
    """Disk space available"""

    repairs = [ClearLogs()]
    final = HddFinal()

    def run(self):
        hddleft = int(shell("df / --output=avail|tail -n1"))
        if hddleft < 500000:
            return False
        hddleft = int(shell("df /tmp --output=avail|tail -n1"))
        if hddleft < 10000:
            return False
        hddleft = int(shell("df /var/log --output=avail|tail -n1"))
        if hddleft < 10000:
            return False
        files = int(shell("ls /tmp | wc -l"))
        if files > 1000:
            return False
        return True


register.put(HddUsage())
Exemple #12
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_reboot

class PullContainers:
    """pull monroe base image and experiments"""
    def run(self):
        shell("docker pull monroe/base", timeout=300)
        #shell("docker pull monroe/ping")
        #...

class DockerExperiments (module.BasicModule):
    """docker (if installed) contains all monroe base containers"""

    repairs = [PullContainers()]
    final   = None

    def run(self):
        # only run these tests if docker is installed
        docker =  shell("docker --version")
        if not "1.10" in docker: 
            return True
 
        images =  shell("docker images")
        if not "monroe/base" in images:
            return False
        return True

register.put(DockerExperiments())
Exemple #13
0
        #if uptime > 86400:  # one day
        #    return trigger_reboot()

class RestartDlb:
    """restart dlb to adjust ip rules, if they do not match"""
    def run(self):
        ru9999 = shell("ip ru show pref 9999").split()[-1]
        ru90001 = shell("ip ru show pref 90001|head -n 1").split()[-1]
        if ru9999 != ru90001:
            shell("systemctl restart dlb")

class Autotunnel (module.BasicModule):
    """Backend reachable via autotunnel"""

    repairs = [RestartDlb()]
    final   = AutotunnelFinal()

    def run(self):
        """check if we can successfully reach the backend"""

        tunnel = shell('ssh -o StrictHostKeychecking=no -i $BACKEND_SSH_KEY -o ConnectTimeout=5 -o BatchMode=yes -o UserKnownHostsFile=/dev/null $BACKEND_SSH_USER@$BACKEND_SSH_SERVER echo success', source='/etc/default/autotunnel')
        if not "success" in tunnel:
            return False

        # store last successful connection in a file
        shell('mkdir -p /var/lib/biteback/')
        shell('date +%s > /var/lib/biteback/autotunnel.last')
        return True

register.put(Autotunnel())
Exemple #14
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_maintenance

class Wwan0Final:
    """Maintenance"""

    def run(self):
        return trigger_maintenance("internal modem is not running")

class RestartNL:
    """restart network-listener"""
    def run(self):
        shell("systemctl restart network-listener")

class Wwan0 (module.BasicModule):
    """Confirm internal modem is up and running"""

    repairs = [RestartNL()]
    final   = Wwan0Final()

    def run(self):
        mode = shell("curl -s http://localhost:88/modems|jq '.[]|select(.ifname == \"wwan0\")|.mode'")
        if mode == "null":
            return False
        return True

register.put(Wwan0())
Exemple #15
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_reboot

class CronFinal:
    """Reboot"""

    def run(self):
        return trigger_reboot()

class RestartCron:
    """enable & restart cron service"""
    def run(self):
        shell("systemctl enable cron")
        shell("systemctl restart cron")

class CronService (module.BasicModule):
    """cron service"""

    repairs = [RestartCron()]
    final   = CronFinal()

    def run(self):
        ps =  shell("ps ax|grep cron")
        if not "sbin/cron" in ps: 
            return False
        return True

register.put(CronService())
Exemple #16
0
            return True

        ps = shell("ps ax|grep exporter")
        if not "metadata-exporter" in ps:
            return False
        # is it a service?
        status = shell("systemctl status metadata-exporter")
        if not "running" in status:
            return False

        print("Subscribing to ZMQ socket on tcp://172.17.0.1:5556")
        context = zmq.Context()

        sub = context.socket(zmq.SUB)
        sub.connect("tcp://172.17.0.1:5556")
        sub.setsockopt(zmq.SUBSCRIBE, '')

        poller = zmq.Poller()
        poller.register(sub, zmq.POLLIN)

        socks = dict(poller.poll(60000))
        if socks:
            if socks.get(sub) == zmq.POLLIN:
                return True
        else:
            print("Timeout.")
        return False


register.put(MEService())
Exemple #17
0
    """Temporary Maintenance"""
    def run(self):
        return trigger_maintenance(
            "kernel modules k10temp or sp5100_tco not loaded")


class ReloadKernelModules:
    """reload kernel modules"""
    def run(self):
        shell("depmod")
        shell("modprobe k10temp sp5100_tco")
        shell("systemctl start watchdog")


class KernelModules(module.BasicModule):
    """Check for watchdog and temperature kernel modules"""

    repairs = [ReloadKernelModules()]
    final = KModFinal()

    def run(self):
        mod = shell("lsmod")
        if not "k10temp" in mod:
            return False
        if not "sp5100_tco" in mod:
            return False
        return True


register.put(KernelModules())
Exemple #18
0
class MuninFinal:
    """Reboot"""
    def run(self):
        return trigger_reboot()


class ReinstallMunin:
    """reinstall munin service"""
    def run(self):
        shell(
            "apt-get install -y --force-yes --reinstall munin-plugins-monroe munin-node-c",
            timeout=60)


class MuninService(module.BasicModule):
    """munin service"""

    repairs = [ReinstallMunin()]
    final = MuninFinal()

    def run(self):
        # does the munin server reply
        ps = shell('echo -e list\\\\nquit\\\\n|nc localhost 4949')
        if not "cpu" in ps:
            return False
        return True


register.put(MuninService())
Exemple #19
0
from biteback.util import shell, trigger_reboot


class RSyslogFinal:
    """Ignore"""
    def run(self):
        pass


class RestartRSyslog:
    """enable & restart rsyslog service"""
    def run(self):
        shell("systemctl enable rsyslog")
        shell("systemctl restart rsyslog")


class RSyslogService(module.BasicModule):
    """rsyslog service"""

    repairs = [RestartRSyslog()]
    final = RSyslogFinal()

    def run(self):
        ps = shell("ps ax|grep rsyslog")
        if not "sbin/rsyslog" in ps:
            return False
        return True


register.put(RSyslogService())
Exemple #20
0
from biteback import module, register
from biteback.util import shell, trigger_maintenance


class ProcessesFinal:
    """Reboot"""

    # the most reliable way to mediate fork bombs
    # smarter fixes should be set in limits.conf

    def run(self):
        return trigger_reboot()


class Processes(module.BasicModule):
    """Open processes"""
    # should be limited by container setup as well

    repairs = []
    final = ProcessesFinal()

    def run(self):
        pcount = int(shell("ps -AL --no-headers|wc -l"))
        if pcount > 5000:
            return False
        return True


register.put(Processes())
Exemple #21
0
    def run(self):
        return trigger_maintenance("Thinpool device out of sync. Unable to remove (check dmsetup ls --tree).")

class RemoveThinpool:
    """remove thinpool device and stop docker daemon"""

    def run(self):
        shell("systemctl stop docker", timeout=60)                                       # should be stopped, just in case 
        shell("dmsetup ls|grep docker-|cut -f1 -d'('|sort|xargs dmsetup remove", timeout=60)  # remove any stale leases on the thinpool
        shell("lvremove -f /dev/mapper/vg--monroe-tp--docker", timeout=60)               # remove the thinpool device
        shell("systemctl start docker")                                                  # will fail, but remove the systemctl status message this test triggers on

class Thinpool (module.BasicModule):
    """docker can access thinpool device"""

    repairs = [RemoveThinpool()]
    final   = ThinpoolFinal()

    def run(self):
        status = shell("systemctl status docker -l")
        if "Unable to take ownership of thin-pool" in status:
            return False
        if "Possibly using a different" in status:  #...thin pool than last invocation
            return False
        if "Base Device UUID and Filesystem verification failed" in status:
            return False
        return True

register.put(Thinpool())
Exemple #22
0
            name = iface.get('name')
            index = iface.get('index')
            iid = iface.get('iccid', iface.get('mac'))
            conn = iface.get('conn')
            if "eth" in name:
                post.append({
                    'mac': iid,
                    'index': index,
                    'conn': self.PRIO_1000MB
                })
            elif "wlan" in name:
                post.append({
                    'mac': iid,
                    'index': index,
                    'conn': self.PRIO_500MB
                })
            elif (conn != self.PRIO_50MB) and (conn != self.PRIO_04MB):
                # these two values are set by the scheduling client
                post.append({
                    'iccid': iid,
                    'index': index,
                    'conn': self.PRIO_04MB
                })
        payload = json.dumps({'interfaces': post})
        requests.post('http://localhost:88/dlb', payload)

        return True


register.put(Priorities())
Exemple #23
0
from biteback.util import shell, trigger_reboot


class CronFinal:
    """Reboot"""
    def run(self):
        return trigger_reboot()


class RestartCron:
    """enable & restart cron service"""
    def run(self):
        shell("systemctl enable cron")
        shell("systemctl restart cron")


class CronService(module.BasicModule):
    """cron service"""

    repairs = [RestartCron()]
    final = CronFinal()

    def run(self):
        ps = shell("ps ax|grep cron")
        if not "sbin/cron" in ps:
            return False
        return True


register.put(CronService())
Exemple #24
0
from biteback.util import shell, trigger_reboot


class PullContainers:
    """pull monroe base image and experiments"""
    def run(self):
        shell("docker pull monroe/base", timeout=300)
        #shell("docker pull monroe/ping")
        #...


class DockerExperiments(module.BasicModule):
    """docker (if installed) contains all monroe base containers"""

    repairs = [PullContainers()]
    final = None

    def run(self):
        # only run these tests if docker is installed
        docker = shell("docker --version")
        if not "1.10" in docker:
            return True

        images = shell("docker images")
        if not "monroe/base" in images:
            return False
        return True


register.put(DockerExperiments())
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_maintenance

class TempFinal:
    """Temporary Maintenance"""

    def run(self):
        return trigger_maintenance("cpu temperature exceeded")

class Temperature(module.BasicModule):
    """CPU Temperature"""

    repairs = []
    final   = TempFinal()

    def run(self):
        temp = shell("/etc/munin/plugins/temp").split(" ")[1]
        if len(temp) > 0 and temp[0].isdigit():
            # only convert if temp has at least a digit            
            if float(temp) > 100.0:
                return False 
        
        # Temp is either below max threshold, or in a virtual environment (e.g. qemu)
        # without sensors (temp is empty/non-digit). All good.   
        return True

register.put(Temperature())
Exemple #26
0
class RestartDLB:
    """enable & restart dlb service"""
    def run(self):
        shell("systemctl enable dlb")
        shell("systemctl restart dlb")

class ReinstallDLB:
    """reinstall dlb service"""

    def run(self):
        shell("apt-get install -y --force-yes --reinstall dlb", timeout=60)

class DLBService (module.BasicModule):
    """dlb service"""

    repairs = [RestartDLB(), ReinstallDLB()]
    final   = DLBFinal()

    def run(self):
        # is network listener running
        ps =  shell("ps ax|grep dlb")
        if not "sbin/dlb" in ps: 
            return False
        # is it a service? 
        status =  shell("systemctl status dlb")
        if not "running" in status: 
            return False
        return True

register.put(DLBService())
            return True

        ps =  shell("ps ax|grep exporter")
        if not "metadata-exporter" in ps:
            return False
        # is it a service?
        status =  shell("systemctl status metadata-exporter")
        if not "running" in status:
            return False


        print("Subscribing to ZMQ socket on tcp://172.17.0.1:5556")
        context = zmq.Context()

        sub = context.socket(zmq.SUB)
        sub.connect("tcp://172.17.0.1:5556")
        sub.setsockopt(zmq.SUBSCRIBE, '')

        poller = zmq.Poller()
        poller.register(sub, zmq.POLLIN)

        socks = dict(poller.poll(60000))
        if socks:
            if socks.get(sub) == zmq.POLLIN:
                return True
        else:
            print("Timeout.")
        return False

register.put(MEService())
Exemple #28
0
    def run(self):
        return trigger_reinstall()

class ConfigureAll:
    """rerun dpkg configuration"""
    def run(self):
        shell("dpkg --configure -a")

class ReinstallHalfInstalled:
    """reinstall half-installed packages"""
    def run(self):
        # dpkg -l 'ih' means: Desired=Installed, Status=Half-inst
        packages = shell("dpkg -l|grep -E ^ih|awk '{print $2}'").strip().split("\n")
        for pkg in packages:
           shell("apt-get install -y --allow-unauthenticated --reinstall %s" % (pkg,), timeout=60)

class DpkgCompleted (module.BasicModule):
    """dpkg is in a valid state"""

    repairs = [ConfigureAll(), ReinstallHalfInstalled()]
    final   = DpkgFinal()

    def run(self):
        # identify not completely installed packages in dpkg
        status = shell("dpkg -l|grep -E ^i|grep -vE ^ii")
        if status:
            return False
        return True

register.put(DpkgCompleted())
Exemple #29
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_reboot

class SSHDFinal:
    """Reboot"""

    def run(self):
        return trigger_reboot()

class RestartSSHD:
    """enable & restart sshd service"""
    def run(self):
        shell("systemctl enable sshd")
        shell("systemctl restart sshd")

class SSHDService (module.BasicModule):
    """sshd service"""

    repairs = [RestartSSHD()]
    final   = SSHDFinal()

    def run(self):
        ps =  shell("ps ax|grep sshd")
        if not "sbin/sshd" in ps: 
            return False
        return True

register.put(SSHDService())
class KModFinal:
    """Temporary Maintenance"""

    def run(self):
        return trigger_maintenance("kernel modules k10temp or sp5100_tco not loaded")

class ReloadKernelModules:
    """reload kernel modules"""

    def run(self):
        shell("depmod")
        shell("modprobe k10temp sp5100_tco")
        shell("systemctl start watchdog")

class KernelModules(module.BasicModule):
    """Check for watchdog and temperature kernel modules"""

    repairs = [ReloadKernelModules()]
    final   = KModFinal()

    def run(self):
        mod = shell("lsmod")
        if not "k10temp" in mod:
            return False
        if not "sp5100_tco" in mod:
            return False
        return True

register.put(KernelModules())
Exemple #31
0
    final   = PrioritiesFinal()

    # from dlb/Address.h
    PRIO_04MB = 3
    PRIO_50MB = 11
    PRIO_100MB = 12
    PRIO_500MB  = 14
    PRIO_1000MB = 15

    def run(self):
        dlbdata = requests.get('http://localhost:88/dlb')
        post = []
        for iface in dlbdata.json().get('interfaces'):
            name  = iface.get('name')
            index = iface.get('index')
            iid = iface.get('iccid',iface.get('mac'))
            conn = iface.get('conn')
            if "eth" in name:
                post.append({'mac':iid, 'index':index, 'conn':self.PRIO_1000MB})
            elif "wlan" in name:
                post.append({'mac':iid, 'index':index, 'conn':self.PRIO_500MB})
            elif (conn != self.PRIO_50MB) and (conn != self.PRIO_04MB):
                # these two values are set by the scheduling client
                post.append({'iccid':iid, 'index':index, 'conn':self.PRIO_04MB})
        payload = json.dumps({'interfaces':post})
        requests.post('http://localhost:88/dlb', payload)

        return True

register.put(Priorities())
Exemple #32
0
        shell("dpkg --configure -a")


class ReinstallHalfInstalled:
    """reinstall half-installed packages"""
    def run(self):
        # dpkg -l 'ih' means: Desired=Installed, Status=Half-inst
        packages = shell("dpkg -l|grep -E ^ih|awk '{print $2}'").strip().split(
            "\n")
        for pkg in packages:
            shell("apt-get install -y --allow-unauthenticated --reinstall %s" %
                  (pkg, ),
                  timeout=60)


class DpkgCompleted(module.BasicModule):
    """dpkg is in a valid state"""

    repairs = [ConfigureAll(), ReinstallHalfInstalled()]
    final = DpkgFinal()

    def run(self):
        # identify not completely installed packages in dpkg
        status = shell("dpkg -l|grep -E ^i|grep -vE ^ii")
        if status:
            return False
        return True


register.put(DpkgCompleted())
Exemple #33
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_reinstall

class AnsibleFinal:
    """Reinstall"""

    def run(self):
        return trigger_reinstall()

class RescheduleAnsible:
    """restore ansible cron entry"""
    def run(self):
        shell("echo '*/20 * * * * root /usr/bin/ansible-wrapper &>/dev/null' > /etc/cron.d/ansible-wrapper")

class Ansible (module.BasicModule):
    """ansible wrapper installed in crontab"""

    repairs = [RescheduleAnsible()]
    final   = AnsibleFinal()

    def run(self):
        cron =  shell("cat /etc/cron.d/ansible-wrapper")
        if not "/usr/bin/ansible-wrapper" in cron: 
            return False
        return True

register.put(Ansible())
Exemple #34
0
    def run(self):
        shell("apt-get install -y --force-yes --reinstall docker-engine",
              timeout=160)


class DockerService(module.BasicModule):
    """docker service"""

    # TODO: Repair #1: restore docker.service file which uses /etc/defaults/docker
    repairs = [RestartDocker()]
    final = DockerFinal()

    def run(self):
        # if docker is not installed, we assume that is intentional
        installed = shell("dpkg -l|grep docker-engine")
        if not "ii" in installed:
            return True

        ps = shell("ps ax|grep docker")
        if not "--bip" in ps:
            return False

        status = shell("systemctl status docker")
        if not "active (running)" in status:
            return False

        return True


register.put(DockerService())
Exemple #35
0

class AnsibleFinal:
    """Reinstall"""
    def run(self):
        return trigger_reinstall()


class RescheduleAnsible:
    """restore ansible cron entry"""
    def run(self):
        shell(
            "echo '*/20 * * * * root /usr/bin/ansible-wrapper &>/dev/null' > /etc/cron.d/ansible-wrapper"
        )


class Ansible(module.BasicModule):
    """ansible wrapper installed in crontab"""

    repairs = [RescheduleAnsible()]
    final = AnsibleFinal()

    def run(self):
        cron = shell("cat /etc/cron.d/ansible-wrapper")
        if not "/usr/bin/ansible-wrapper" in cron:
            return False
        return True


register.put(Ansible())
Exemple #36
0
from biteback.util import shell, trigger_reboot


class SSHDFinal:
    """Reboot"""
    def run(self):
        return trigger_reboot()


class RestartSSHD:
    """enable & restart sshd service"""
    def run(self):
        shell("systemctl enable sshd")
        shell("systemctl restart sshd")


class SSHDService(module.BasicModule):
    """sshd service"""

    repairs = [RestartSSHD()]
    final = SSHDFinal()

    def run(self):
        ps = shell("ps ax|grep sshd")
        if not "sbin/sshd" in ps:
            return False
        return True


register.put(SSHDService())
Exemple #37
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_reboot

class RSyslogFinal:
    """Ignore"""

    def run(self):
        pass

class RestartRSyslog:
    """enable & restart rsyslog service"""
    def run(self):
        shell("systemctl enable rsyslog")
        shell("systemctl restart rsyslog")

class RSyslogService (module.BasicModule):
    """rsyslog service"""

    repairs = [RestartRSyslog()]
    final   = RSyslogFinal()

    def run(self):
        ps =  shell("ps ax|grep rsyslog")
        if not "sbin/rsyslog" in ps: 
            return False
        return True

register.put(RSyslogService())
Exemple #38
0
    """enable & restart autotunnel service"""
    def run(self):
        shell("systemctl enable autotunnel")
        shell("systemctl restart autotunnel")


class ReinstallTunnel:
    """reinstall autotunnel service"""
    def run(self):
        shell("apt-get install -y --force-yes --reinstall autotunnel")


class AutotunnelService(module.BasicModule):
    """Autotunnel systemd service"""

    repairs = [RestartTunnel(), ReinstallTunnel()]
    final = AutotunnelFinal()

    def run(self):
        # is the watcher running
        ps = shell("ps ax|grep autotunnel")
        if not "watcher" in ps:
            return False
        status = shell("systemctl status autotunnel")
        if not "running" in status:
            return False
        return True


register.put(AutotunnelService())
Exemple #39
0
#!/usr/bin/env python

from biteback import module, register
from biteback.util import shell, trigger_maintenance


class TempFinal:
    """Temporary Maintenance"""
    def run(self):
        return trigger_maintenance("cpu temperature exceeded")


class Temperature(module.BasicModule):
    """CPU Temperature"""

    repairs = []
    final = TempFinal()

    def run(self):
        temp = float(shell("/etc/munin/plugins/temp").split(" ")[1])
        if temp > 100.0:
            return False
        return True


register.put(Temperature())
Exemple #40
0
        shell("systemctl stop docker",
              timeout=60)  # should be stopped, just in case
        shell(
            "dmsetup ls|grep docker-|cut -f1 -d'('|sort|xargs dmsetup remove",
            timeout=60)  # remove any stale leases on the thinpool
        shell("lvremove -f /dev/mapper/vg--monroe-tp--docker",
              timeout=60)  # remove the thinpool device
        shell(
            "systemctl start docker"
        )  # will fail, but remove the systemctl status message this test triggers on


class Thinpool(module.BasicModule):
    """docker can access thinpool device"""

    repairs = [RemoveThinpool()]
    final = ThinpoolFinal()

    def run(self):
        status = shell("systemctl status docker -l")
        if "Unable to take ownership of thin-pool" in status:
            return False
        if "Possibly using a different" in status:  #...thin pool than last invocation
            return False
        if "Base Device UUID and Filesystem verification failed" in status:
            return False
        return True


register.put(Thinpool())
Exemple #41
0
    """Maintenance"""
    def run(self):
        return trigger_maintenance("cannot retrieve metadata in monroe netns")


class ResetContainers:
    """stop all containers and restart base experiments"""
    def run(self):
        shell("docker stop -t 0 $(docker ps -q)")
        shell("monroe-experiments", timeout=120)


class Metadata(module.BasicModule):
    """metadata-exporter service"""

    repairs = [ResetContainers()]
    final = MetaFinal()

    def run(self):
        metadata = shell("ip netns exec monroe metadata | head -c 6",
                         timeout=60)
        if "Cannot" in metadata:
            print "Netns monroe does not exist. Ignoring"
            return True
        if "MONROE" in metadata:
            return True
        return False


register.put(Metadata())