# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import signal from gremlins import faults, metafaults, triggers rs_kill_long = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 100) rs_kill_short = faults.kill_daemons(["HRegionServer"], signal.SIGKILL, 3) dn_kill_long = faults.kill_daemons(["DataNode"], signal.SIGKILL, 100) dn_kill_short = faults.kill_daemons(["DataNode"], signal.SIGKILL, 3) rs_pause = faults.pause_daemons(["HRegionServer"], 62) dn_pause = faults.pause_daemons(["DataNode"], 20) # This fault isn't that useful yet, since it only drops inbound packets # but outbound packets (eg, the ZK pings) keep going. rs_drop_inbound_packets = faults.drop_packets_to_daemons(["HRegionServer"], 64) profile = [ triggers.Periodic( 45, metafaults.pick_fault([ # kill -9s (5, rs_kill_long), (1, dn_kill_long), # fast kill -9s (5, rs_kill_short),
procutils.run(["/Users/criccomi/svn/pegasus/trunk/d2/scripts/lb-tool.sh", "--put_service", "service-2", "--cluster", "cluster-1", "--path", "/service-2", "--balancer", "degrader", "--store", "zk://localhost:2181/echo/lb/services"]) # start server and client procutils.start_daemon('LoadBalancerEchoServer'); procutils.start_daemon('LoadBalancerEchoClient'); # declare faults kill_short_zk = faults.kill_daemons(["QuorumPeerMain"], signal.SIGKILL, 5) kill_short_server = faults.kill_daemons(["LoadBalancerEchoServer"], signal.SIGKILL, 5) kill_short_client = faults.kill_daemons(["LoadBalancerEchoClient"], signal.SIGKILL, 5) kill_long_zk = faults.kill_daemons(["QuorumPeerMain"], signal.SIGKILL, 60) kill_long_server = faults.kill_daemons(["LoadBalancerEchoServer"], signal.SIGKILL, 60) kill_long_client = faults.kill_daemons(["LoadBalancerEchoClient"], signal.SIGKILL, 60) pause_zk = faults.pause_daemons(["QuorumPeerMain"], 60) pause_server = faults.pause_daemons(["LoadBalancerEchoServer"], 60) pause_client = faults.pause_daemons(["LoadBalancerEchoClient"], 60) def random_fault(): metafaults.pick_fault([ (1, kill_short_zk), (1, kill_short_server), (1, kill_short_client), #(1, kill_long_zk), #(1, kill_long_server), #(1, kill_long_client), #(1, pause_zk), #(1, pause_server),
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import signal from gremlins import faults, metafaults, triggers rs_kill_long = faults.kill_daemons(["aa1dc150d996"], signal.SIGKILL, 100) rs_kill_short = faults.kill_daemons(["aa1dc150d996"], signal.SIGKILL, 3) dn_kill_long = faults.kill_daemons(["DataNode"], signal.SIGKILL, 100) dn_kill_short = faults.kill_daemons(["DataNode"], signal.SIGKILL, 3) rs_pause = faults.pause_daemons(["aa1dc150d996"], 120) dn_pause = faults.pause_daemons(["DataNode"], 20) # This fault isn't that useful yet, since it only drops inbound packets # but outbound packets (eg, the ZK pings) keep going. rs_drop_inbound_packets = faults.drop_packets_to_daemons(["aa1dc150d996"], 64) profile = [ triggers.Periodic( 45, metafaults.pick_fault([ # kill -9s (5, rs_kill_long), (1, dn_kill_long), # fast kill -9s (5, rs_kill_short),
# See the License for the specific language governing permissions and # limitations under the License. import signal from gremlins import faults, metafaults, triggers # how long before reset fn_kill_long = faults.kill_daemons(["FlumeNode"], signal.SIGKILL, 100) fn_kill_short = faults.kill_daemons(["FlumeNode"], signal.SIGKILL, 3) fm_kill_long = faults.kill_daemons(["FlumeMaster"], signal.SIGKILL, 100) fm_kill_short = faults.kill_daemons(["FlumeMaster"], signal.SIGKILL, 3) # 62 is zk timeout in hbase fn_pause = faults.pause_daemons(["FlumeName"], 62) fm_pause = faults.pause_daemons(["FlumeMaster"], 20) # This fault isn't that useful yet, since it only drops inbound packets # but outbound packets (eg, the ZK pings) keep going. fn_drop_inbound_packets = faults.drop_packets_to_daemons(["HRegionServer"], 64) profile = [ triggers.Periodic( 45, # ever 45 seconds metafaults.pick_fault([ # (weight, fault to fire) # kill -9s (5, fn_kill_long), (1, fm_kill_long), # fast kill -9s (5, fn_kill_short),