Ejemplo n.º 1
0
def signal_handler(sig, frame):
  faults.kill_daemons(["QuorumPeerMain"], signal.SIGKILL, 60)
  faults.kill_daemons(["LoadBalancerEchoServer"], signal.SIGKILL, 60)
  faults.kill_daemons(["LoadBalancerEchoClient"], signal.SIGKILL, 60)
  random_periodic.stop
  random_periodic.join
  sys.exit(0)
Ejemplo n.º 2
0
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import signal

from gremlins import faults, metafaults, triggers

rs_kill_long = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 100)
rs_kill_short = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 3)

dn_kill_long = faults.kill_daemons(["DataNode"], signal.SIGKILL, 100)
dn_kill_short = faults.kill_daemons(["DataNode"], signal.SIGKILL, 3)

rs_pause = faults.pause_daemons(["HRegionServer"], 62)
dn_pause = faults.pause_daemons(["DataNode"], 20)

# This fault isn't that useful yet, since it only drops inbound packets
# but outbound packets (eg, the ZK pings) keep going.
rs_drop_inbound_packets = faults.drop_packets_to_daemons(["HRegionServer"], 64)

profile = [
    triggers.Periodic(
        45,
Ejemplo n.º 3
0
}

# start zoo keeper
procutils.start_daemon('QuorumPeerMain');

# put cluster and service properties
procutils.run(["/Users/criccomi/svn/pegasus/trunk/d2/scripts/lb-tool.sh", "--put_cluster", "cluster-1", "--schemes", "prpc,http", "--banned", "http://www.google.com", "--store", "zk://localhost:2181/echo/lb/clusters"])
procutils.run(["/Users/criccomi/svn/pegasus/trunk/d2/scripts/lb-tool.sh", "--put_service", "service-1", "--cluster", "cluster-1", "--path", "/service-1", "--balancer", "degrader", "--store", "zk://localhost:2181/echo/lb/services"])
procutils.run(["/Users/criccomi/svn/pegasus/trunk/d2/scripts/lb-tool.sh", "--put_service", "service-2", "--cluster", "cluster-1", "--path", "/service-2", "--balancer", "degrader", "--store", "zk://localhost:2181/echo/lb/services"])

# start server and client
procutils.start_daemon('LoadBalancerEchoServer');
procutils.start_daemon('LoadBalancerEchoClient');

# declare faults
kill_short_zk = faults.kill_daemons(["QuorumPeerMain"], signal.SIGKILL, 5)
kill_short_server = faults.kill_daemons(["LoadBalancerEchoServer"], signal.SIGKILL, 5)
kill_short_client = faults.kill_daemons(["LoadBalancerEchoClient"], signal.SIGKILL, 5)

kill_long_zk = faults.kill_daemons(["QuorumPeerMain"], signal.SIGKILL, 60)
kill_long_server = faults.kill_daemons(["LoadBalancerEchoServer"], signal.SIGKILL, 60)
kill_long_client = faults.kill_daemons(["LoadBalancerEchoClient"], signal.SIGKILL, 60)

pause_zk = faults.pause_daemons(["QuorumPeerMain"], 60)
pause_server = faults.pause_daemons(["LoadBalancerEchoServer"], 60)
pause_client = faults.pause_daemons(["LoadBalancerEchoClient"], 60)

def random_fault():
  metafaults.pick_fault([
    (1, kill_short_zk),
    (1, kill_short_server),
Ejemplo n.º 4
0
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import signal

from gremlins import faults, metafaults, triggers

rs_kill_long = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 100)
rs_kill_short = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 3)

dn_kill_long = faults.kill_daemons(["DataNode"], signal.SIGKILL, 100)
dn_kill_short = faults.kill_daemons(["DataNode"], signal.SIGKILL, 3)

rs_pause = faults.pause_daemons(["HRegionServer"], 62)
dn_pause = faults.pause_daemons(["DataNode"], 20)

# This fault isn't that useful yet, since it only drops inbound packets
# but outbound packets (eg, the ZK pings) keep going.
rs_drop_inbound_packets = faults.drop_packets_to_daemons(["HRegionServer"], 64)

profile = [
  triggers.Periodic(
    45,
Ejemplo n.º 5
0
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import signal

from gremlins import faults, metafaults, triggers

rs_kill_long = faults.kill_daemons(["aa1dc150d996"], signal.SIGKILL, 100)
rs_kill_short = faults.kill_daemons(["aa1dc150d996"], signal.SIGKILL, 3)

dn_kill_long = faults.kill_daemons(["DataNode"], signal.SIGKILL, 100)
dn_kill_short = faults.kill_daemons(["DataNode"], signal.SIGKILL, 3)

rs_pause = faults.pause_daemons(["aa1dc150d996"], 120)
dn_pause = faults.pause_daemons(["DataNode"], 20)

# This fault isn't that useful yet, since it only drops inbound packets
# but outbound packets (eg, the ZK pings) keep going.
rs_drop_inbound_packets = faults.drop_packets_to_daemons(["aa1dc150d996"], 64)

profile = [
    triggers.Periodic(
        45,
Ejemplo n.º 6
0
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import signal

from gremlins import faults, metafaults, triggers

# how long before reset 
fn_kill_long = faults.kill_daemons(["FlumeNode"], signal.SIGKILL, 100)
fn_kill_short = faults.kill_daemons(["FlumeNode"], signal.SIGKILL, 3)

fm_kill_long = faults.kill_daemons(["FlumeMaster"], signal.SIGKILL, 100)
fm_kill_short = faults.kill_daemons(["FlumeMaster"], signal.SIGKILL, 3)

# 62 is zk timeout in hbase
fn_pause = faults.pause_daemons(["FlumeName"], 62)
fm_pause = faults.pause_daemons(["FlumeMaster"], 20)

# This fault isn't that useful yet, since it only drops inbound packets
# but outbound packets (eg, the ZK pings) keep going.
fn_drop_inbound_packets = faults.drop_packets_to_daemons(["HRegionServer"], 64)

profile = [
  triggers.Periodic(